source "drivers/infiniband/ulp/ipoib/Kconfig"
+source "drivers/infiniband/ulp/xsigo/Kconfig"
+
source "drivers/infiniband/ulp/srp/Kconfig"
source "drivers/infiniband/ulp/srpt/Kconfig"
obj-$(CONFIG_INFINIBAND_IPOIB) += ipoib/
+obj-$(CONFIG_INFINIBAND_XSCORE) += xsigo/
obj-$(CONFIG_INFINIBAND_SRP) += srp/
obj-$(CONFIG_INFINIBAND_SRPT) += srpt/
obj-$(CONFIG_INFINIBAND_ISER) += iser/
--- /dev/null
+source "drivers/infiniband/ulp/xsigo/xscore/Kconfig"
+source "drivers/infiniband/ulp/xsigo/xsvnic/Kconfig"
+source "drivers/infiniband/ulp/xsigo/xsvhba/Kconfig"
+source "drivers/infiniband/ulp/xsigo/xve/Kconfig"
--- /dev/null
+obj-$(CONFIG_INFINIBAND_XSCORE) += xscore/
+obj-$(CONFIG_INFINIBAND_XSVNIC) += xsvnic/
+obj-$(CONFIG_INFINIBAND_XSVHBA) += xsvhba/
+obj-$(CONFIG_INFINIBAND_XVE) += xve/
--- /dev/null
+config INFINIBAND_XSCORE
+ tristate "Xsigo fabric support for InfiniBand devices"
+ depends on NETDEVICES && INET && INFINIBAND
+ ---help---
+ Support for Xsigo fabric for Infiniband devices. This will enable
+ Infiniband data transfer through Xsigo director.
--- /dev/null
+obj-$(CONFIG_INFINIBAND_XSCORE) := xscore.o
+xscore-y := xscore_impl.o xs_ud.o xscore_api.o xsmp.o \
+ xscore_stats.o xscore_uadm.o
+
+ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\"
+ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS
+ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE
+ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT
+ccflags-y += -Idrivers/infiniband/include
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/radix-tree.h>
+#include <linux/notifier.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+
+#include "xg_heap.h"
+
+#ifdef __KERNEL__
+#define RADIX_TREE_MAP_SHIFT 6
+#else
+#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
+#endif
+#define RADIX_TREE_TAGS 2
+
+#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
+#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1)
+
+#define RADIX_TREE_TAG_LONGS \
+ ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+struct radix_tree_node {
+ unsigned int count;
+ void *slots[RADIX_TREE_MAP_SIZE];
+ unsigned long tags[RADIX_TREE_TAGS][RADIX_TREE_TAG_LONGS];
+};
+
+struct radix_tree_path {
+ struct radix_tree_node *node;
+ int offset;
+};
+
+#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
+#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
+
+static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly;
+
+/*
+ * Radix tree node cache.
+ */
+static kmem_cache_t *radix_tree_node_cachep;
+
+/*
+ * Per-cpu pool of preloaded nodes
+*/
+struct radix_tree_preload {
+ int nr;
+ struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
+};
+
+vmk_moduleid moduleid;
+vmk_heapid heapid;
+
+void memory_thread_init(void)
+{
+ moduleid = vmk_modulestacktop();
+ pr_info("module id = %d\n", moduleid);
+ heapid = vmk_modulegetheapid(moduleid);
+}
+
+void *ib_alloc_pages(unsigned int flags, unsigned int order)
+{
+ void *vaddr;
+ unsigned long size = (VMK_PAGE_SIZE << order);
+
+ vaddr = vmk_heapalign(heapid, size, PAGE_SIZE);
+ if (vaddr == NULL)
+ return 0;
+
+ return vaddr;
+}
+EXPORT_SYMBOL(ib_alloc_pages);
+
+void ib_free_pages(void *ptr, int order)
+{
+ vmk_heapfree(heapid, ptr);
+}
+EXPORT_SYMBOL(ib_free_pages);
+
+void *ib_kmalloc(size_t size, gfp_t flags)
+{
+ return vmk_heapalloc(heapid, size);
+}
+EXPORT_SYMBOL(ib_kmalloc);
+
+void ib_free(void *ptr)
+{
+ vmk_heapfree(heapid, ptr);
+}
+EXPORT_SYMBOL(ib_free);
+
+static int __init ib_kompat_init(void)
+{
+ radix_tree_init();
+ memory_thread_init();
+ return 0;
+}
+
+static void __exit ib_kompat_cleanup(void)
+{
+ radix_tree_destroy();
+}
+
+int xg_vmk_kompat_init(void)
+{
+ return ib_kompat_init();
+}
+
+void xg_vmk_kompat_cleanup(void)
+{
+ return ib_kompat_cleanup();
+}
+
+/*
+ * We added iowrite64_copy because it is a missing API
+ */
+void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
+{
+ u64 __iomem *dst = to;
+ const u64 *src = from;
+ const u64 *end = src + count;
+
+ while (src < end)
+ __raw_writeq(*src++, dst++);
+}
+EXPORT_SYMBOL(__iowrite64_copy);
+
+/*
+ * memmove() implementation taken from vmklinux26/linux/lib/string.c
+ */
+void *memmove(void *dest, const void *src, size_t count)
+{
+ char *tmp;
+ const char *s;
+
+ if (dest <= src) {
+ tmp = dest;
+ s = src;
+ while (count--)
+ *tmp++ = *s++;
+ } else {
+ tmp = dest;
+ tmp += count;
+ s = src;
+ s += count;
+ while (count--)
+ *--tmp = *--s;
+ }
+ return dest;
+}
+EXPORT_SYMBOL(memmove);
+
+/* functions from radix-tree.c */
+static void
+radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags)
+{
+ memset(node, 0, sizeof(struct radix_tree_node));
+}
+
+static __init unsigned long __maxindex(unsigned int height)
+{
+ unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
+ unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;
+
+ if (tmp >= RADIX_TREE_INDEX_BITS)
+ index = ~0UL;
+ return index;
+}
+
+static __init void radix_tree_init_maxindex(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
+ height_to_maxindex[i] = __maxindex(i);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int radix_tree_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+ struct radix_tree_preload *rtp;
+ return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void __init radix_tree_init(void)
+{
+ radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
+ sizeof(struct
+ radix_tree_node), 0,
+ SLAB_PANIC,
+ radix_tree_node_ctor, NULL);
+ radix_tree_init_maxindex();
+}
--- /dev/null
+/*
+ * This software is available to you under the OpenIB.org BSD license,
+ * available in the LICENSE.TXT file accompanying this software.
+ * These details are also available at <http://openib.org/license.html>.
+ *
+ */
+
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+#include <linux/io.h>
+#include <linux/in.h>
+#include <linux/workqueue.h>
+#include <linux/log2.h>
+#include <linux/byteorder/swab.h>
+#include <linux/mutex.h>
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+
+#define for_each_netdev(a, dev) for ((dev) = dev_base;\
+ (dev) != NULL;\
+ (dev) = (dev)->next)
+
+void *memmove(void *dest, const void *src, size_t count);
+
+#ifndef bool
+#define bool int
+#define true 1
+#define false 0
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef _XS_COMPAT_H
+#define _XS_COMPAT_H
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+#include <linux/io.h>
+#include <linux/in.h>
+#include <linux/log2.h>
+#include <linux/mutex.h>
+#include <linux/version.h>
+#include <linux/idr.h>
+#include <linux/netdevice.h>
+#include <linux/tcp.h>
+#include <linux/workqueue.h>
+#include <rdma/ib_verbs.h>
+
+/*
+ * Workqueue changes backport for kernel < linux 2.6.20
+ * ESX 4.0 has these changes and hence no need of this
+ */
+
+typedef void *xsmp_cookie_t;
+
+#if defined(XSIGOPSEUDOFLAG)
+/*
+ * ESX-5.1 pseudo device registration.
+ */
+static inline void xg_preregister_pseudo_device(struct net_device *netdev)
+{
+ if (netdev->pdev) {
+ netdev->pdev->netdev = NULL;
+ netdev->pdev_pseudo = netdev->pdev;
+ netdev->pdev = NULL;
+ }
+}
+
+static inline void xg_setup_pseudo_device(struct net_device *netdev,
+ struct ib_device *hca)
+{
+ netdev->features |= NETIF_F_PSEUDO_REG;
+ SET_NETDEV_DEV(netdev, hca->dma_device);
+}
+#else
+static inline void xg_preregister_pseudo_device(struct net_device *netdev)
+{
+}
+
+static inline void xg_setup_pseudo_device(struct net_device *netdev,
+ struct ib_device *hca)
+{
+}
+#endif
+
+static inline void xg_set_netdev_dev(struct net_device *netdev,
+ struct ib_device *hca)
+{
+}
+
+#ifndef BACKPORT_LINUX_WORKQUEUE_TO_2_6_19
+
+#endif
+
+#if !defined(XG_FRAG_SIZE_PRESENT)
+
+static inline unsigned int skb_frag_size(const skb_frag_t *frag)
+{
+ return frag->size;
+}
+
+#endif
+
+#if !defined(XG_FRAG_PAGE_PRESENT)
+
+static inline struct page *skb_frag_page(const skb_frag_t *frag)
+{
+ return frag->page;
+}
+
+#endif
+
+#include <scsi/scsi_cmnd.h>
+
+#if defined(SCSI_STRUCT_CHANGES)
+
+static inline void scsi_set_buffer(struct scsi_cmnd *cmd, void *buffer)
+{
+ cmd->sdb.table.sgl = buffer;
+}
+
+static inline void set_scsi_sg_count(struct scsi_cmnd *cmd, int cnt)
+{
+ cmd->sdb.table.nents = cnt;
+}
+
+#else /* ! defined(SCSI_STRUCT_CHANGES) */
+
+static inline void scsi_set_buffer(struct scsi_cmnd *cmd, void *buffer)
+{
+ cmd->request_buffer = buffer;
+}
+
+#define set_scsi_sg_count(cmd, cnt) ((cmd)->use_sg = (cnt))
+
+#ifndef scsi_sg_count
+
+#define scsi_sg_count(cmd) ((cmd)->use_sg)
+#define scsi_sglist(cmd) ((struct scatterlist *)(cmd)->request_buffer)
+#define scsi_bufflen(cmd) ((cmd)->request_bufflen)
+
+static inline void scsi_set_resid(struct scsi_cmnd *cmd, int resid)
+{
+ cmd->resid = resid;
+}
+
+static inline int scsi_get_resid(struct scsi_cmnd *cmd)
+{
+ return cmd->resid;
+}
+
+#define scsi_for_each_sg(cmd, sg, nseg, __i) \
+ for_each_sg(scsi_sglist(cmd), sg, nseg, __i)
+
+#endif
+
+#ifndef sg_page
+#define sg_page(x) ((x)->page)
+#endif
+
+#endif /* ! defined(SCSI_STRUCT_CHANGES) */
+
+#if defined(SCSI_TIMEOUT_CHANGES)
+#define timeout_per_command(cmd) ((cmd)->request->timeout)
+#define vhba_reset_scsi_timeout(cmd, jiffies) /* NOTHING */
+#else /* ! defined(SCSI_TIMEOUT_CHANGES) */
+#define timeout_per_command(cmd) ((cmd)->timeout_per_command)
+#define vhba_reset_scsi_timeout(cmd, jiffies) \
+do { \
+ if ((cmd)->eh_timeout.function) \
+ mod_timer(&(cmd)->eh_timeout, jiffies) \
+} while (0)
+#endif /* ! defined(SCSI_TIMEOUT_CHANGES) */
+
+#define SET_OWNER(file) do { } while (0)
+
+/*
+ * In 2.6.31 added new netdev_ops in netdev
+ */
+#define SET_NETDEV_OPS(netdev, ops) \
+ ((netdev)->netdev_ops = (ops))
+
+#if !defined(HAS_SKB_ACCESS_FUNCTIONS)
+
+static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
+{
+ return (struct tcphdr *)skb_transport_header(skb);
+}
+
+static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
+{
+ return skb->h.th->doff << 2;
+}
+
+static inline unsigned int tcp_optlen(const struct sk_buff *skb)
+{
+ return (skb->h.th->doff - 5) * 4;
+}
+
+static inline void skb_reset_network_header(struct sk_buff *skb)
+{
+ skb->nh.raw = skb->data;
+}
+#endif
+
+/*
+ * Backported NAPI changes ESX 4.0 already supports it
+ */
+
+static inline void napi_update_budget(struct napi_struct *n, int cnt)
+{
+}
+
+#ifndef NETIF_F_GRO
+#define NETIF_F_GRO 0
+#endif
+
+#ifndef NETIF_F_GSO
+#define NETIF_F_GSO 0
+#endif
+
+#ifndef IFF_SLAVE_INACTIVE
+#define IFF_SLAVE_INACTIVE 0x4
+#endif
+
+#ifndef CHECKSUM_PARTIAL
+#define CHECKSUM_PARTIAL CHECKSUM_HW
+#endif
+
+#if !defined(LLE) && defined(IB_VERBS_H)
+#if defined(NATIVE_IB_STACK_CHECK)
+enum rdma_link_layer {
+ IB_LINK_LAYER_UNSPECIFIED,
+ IB_LINK_LAYER_INFINIBAND,
+ IB_LINK_LAYER_ETHERNET,
+};
+
+static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid)
+{
+ memset(gid->raw, 0, 16);
+ *((u32 *) gid->raw) = cpu_to_be32(0xfe800000);
+ if (vid) {
+ gid->raw[12] = vid & 0xff;
+ gid->raw[11] = vid >> 8;
+ } else {
+ gid->raw[12] = 0xfe;
+ gid->raw[11] = 0xff;
+ }
+
+ memcpy(gid->raw + 13, mac + 3, 3);
+ memcpy(gid->raw + 8, mac, 3);
+ gid->raw[8] ^= 2;
+}
+#endif /*IB_REV_106_CHECK */
+
+static inline enum rdma_link_layer rdma_port_link_layer(struct ib_device
+ *device, u8 port_num)
+{
+ return IB_LINK_LAYER_INFINIBAND;
+}
+
+#endif /* ! defined(LLE) */
+
+#if defined(LLE) && defined(RDMA_PORT_LINK_LAYER_CHANGES)
+#define rdma_port_link_layer rdma_port_get_link_layer
+#endif
+
+#define PROC_ROOT 0
+
+extern int xscore_uadm_init(void);
+extern void xscore_uadm_destroy(void);
+extern void xscore_uadm_receive(xsmp_cookie_t xsmp_hndl, u8 *data, int len);
+
+/* required for IB_REV_106 */
+#if !defined(IB_REV_106_CHECK) || !defined(IB_REV_110_CHECK)
+#define xg_vmk_kompat_init() do {} while (0)
+#define xg_vmk_kompat_cleanup() do {} while (0)
+#else
+extern int xg_vmk_kompat_init(void);
+extern void xg_vmk_kompat_cleanup(void);
+#endif
+
+#define VMWARE_RESERVED_KEYS ""
+#define SG_OFFSET(sg) (sg->offset)
+#define SG_LENGTH(sg) (sg->length)
+#define SG_NEXT(sg) (sg++)
+#define SG_RESET(sg) {}
+#define ib_sa_force_update(client, dev, attr, value, mode) do {} while (0)
+
+#define GET_NLINK(file) ((file)->nlink)
+#define SET_NLINK(file, value) ((file)->nlink = (value))
+
+/*
+ * 8k IBMTU support
+ */
+enum xg_ib_mtu {
+ IB_MTU_8192 = 6
+};
+
+static inline int xg_ib_mtu_enum_to_int(enum ib_mtu _mtu)
+{
+ int mtu = (int)_mtu;
+
+ switch (mtu) {
+ case IB_MTU_256:
+ return 256;
+ case IB_MTU_512:
+ return 512;
+ case IB_MTU_1024:
+ return 1024;
+ case IB_MTU_2048:
+ return 2048;
+ case IB_MTU_4096:
+ return 4096;
+ case IB_MTU_8192:
+ return 8192;
+ default:
+ return -1;
+ }
+}
+#endif /* _XS_COMPAT_H */
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * This file implements the UD send/receive stuff
+ */
+
+#include "xscore_priv.h"
+
+#define XS_UD_RECV_WQE 16
+#define XS_UD_SEND_WQE 8
+
+#define MAX_UD_RX_BUF_SIZE 1024
+#define MAX_UD_TX_BUF_SIZE 1024
+
+#define XSUD_RECV_WRID 0x10000
+#define XSUD_SEND_WRID 0x20000
+#define XSUD_WRID_MASK 0x30000
+
+#define QP_DEF_QKEY 0x11111111
+#define QP_MULTICAST_QPN 0xFFFFFF
+#define QP_MCAST_LID 0xC000
+
+struct ud_tx_buf {
+ void *vaddr;
+ u64 mapping;
+ struct ib_ah *ah;
+ int len;
+};
+
+struct ud_rx_buf {
+ void *vaddr;
+ int len;
+ u64 mapping;
+};
+
+/*
+ * This has context inforamtion on UD
+ */
+struct ib_ud_ctx {
+ struct xscore_port *pinfop;
+ struct ib_cq *cq;
+ struct ib_qp *qp;
+ struct ud_rx_buf rxbuf[XS_UD_RECV_WQE];
+ struct ud_tx_buf txbuf[XS_UD_SEND_WQE];
+ int next_xmit;
+ void (*callback)(void *arg, void *msg, int len);
+ void *client_arg;
+};
+
+static int xs_ud_post_recv(struct ib_ud_ctx *ctx, int offset, int n)
+{
+ struct xscore_port *pinfop = ctx->pinfop;
+ struct ib_device *ca = pinfop->xs_dev->device;
+ struct ib_sge list = {
+ .lkey = pinfop->xs_dev->mr->lkey
+ };
+ struct ib_recv_wr wr = {
+ .sg_list = &list,
+ .num_sge = 1,
+ };
+ struct ib_recv_wr *bad_wr;
+ int i, ret;
+ void *addr;
+ u64 mapping;
+
+ for (i = 0; i < n; ++i, ++offset) {
+ struct ud_rx_buf *rbuf = &ctx->rxbuf[offset];
+
+ addr = kmalloc(MAX_UD_RX_BUF_SIZE, GFP_ATOMIC);
+ if (!addr) {
+ ret = -ENOMEM;
+ goto partial_failure;
+ }
+ rbuf->vaddr = addr;
+ /*
+ * Map the buffer and give the bus address
+ */
+ mapping = ib_dma_map_single(ca, addr, MAX_UD_RX_BUF_SIZE,
+ DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping))) {
+ ret = -EIO;
+ goto partial_failure;
+ }
+ rbuf->mapping = mapping;
+ list.addr = (unsigned long)mapping;
+ list.length = MAX_UD_RX_BUF_SIZE;
+ wr.wr_id = (int)(offset | XSUD_RECV_WRID);
+ ret = ib_post_recv(ctx->qp, &wr, &bad_wr);
+ if (ret) {
+ pr_info("xs_ud_post_recv: ib_post_recv");
+ pr_info(" error, i %d, ret = %d\n", i, ret);
+ goto partial_failure;
+ }
+ }
+ return 0;
+partial_failure:
+ for (; i >= 0; i--, offset--) {
+ struct ud_rx_buf *rbuf = &ctx->rxbuf[offset];
+
+ if (rbuf->mapping) {
+ ib_dma_unmap_single(ca, rbuf->mapping,
+ MAX_UD_RX_BUF_SIZE,
+ DMA_FROM_DEVICE);
+ rbuf->mapping = 0;
+ }
+ if (rbuf->vaddr != NULL) {
+ kfree(rbuf->vaddr);
+ rbuf->vaddr = 0;
+ }
+ }
+ return ret;
+}
+
+static void handle_wc(struct ib_ud_ctx *udp, struct ib_wc *wcp)
+{
+ void *buf;
+ struct ib_device *ca = udp->pinfop->xs_dev->device;
+ struct ud_tx_buf *tbuf;
+ struct ud_rx_buf *rbuf;
+ int ind = (int)wcp->wr_id & 0xFFFF;
+ int wrid = (int)wcp->wr_id & XSUD_WRID_MASK;
+
+ switch (wrid) {
+ case XSUD_SEND_WRID:
+ tbuf = &udp->txbuf[ind];
+ ib_destroy_ah(tbuf->ah);
+ ib_dma_unmap_single(ca, tbuf->mapping, tbuf->len,
+ DMA_TO_DEVICE);
+ kfree(tbuf->vaddr);
+ tbuf->vaddr = 0;
+ tbuf->ah = 0;
+ tbuf->mapping = 0;
+ break;
+ case XSUD_RECV_WRID:
+ rbuf = &udp->rxbuf[ind];
+ ib_dma_unmap_single(ca, rbuf->mapping, MAX_UD_RX_BUF_SIZE,
+ DMA_FROM_DEVICE);
+ buf = rbuf->vaddr;
+ /*
+ * Allocate new buffer in its place
+ */
+ if ((wcp->status == 0) && udp->callback) {
+ (void)xs_ud_post_recv(udp, ind, 1);
+ /*
+ * Get rid of the GRH header
+ */
+ udp->callback(udp->client_arg,
+ buf + sizeof(struct ib_grh),
+ wcp->byte_len - sizeof(struct ib_grh));
+ } else
+ kfree(buf);
+ break;
+ default:
+ pr_warn("xscore: UD unknown WR id\n");
+ break;
+ }
+}
+
+static void ud_compl_handler(struct ib_cq *cq, void *cq_context)
+{
+ struct ib_ud_ctx *udp = cq_context;
+ struct ib_wc wc[1];
+ int i, n;
+
+ /*
+ * Enable interrupts back again
+ */
+ (void)ib_req_notify_cq(cq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+
+ while ((n = ib_poll_cq(cq, 1, wc)) > 0) {
+ for (i = 0; i < n; i++)
+ handle_wc(udp, &wc[i]);
+ }
+}
+
+int xs_ud_send_msg(struct xscore_port *pinfop, uint8_t *macp, void *msgp,
+ int len, int flags)
+{
+ struct ib_ud_ctx *udp = pinfop->ib_ud_ctx;
+ struct ib_device *ca = pinfop->xs_dev->device;
+ u64 mapping;
+ void *addr = msgp;
+ int i;
+ struct ib_sge list = {
+ .length = len,
+ .lkey = pinfop->xs_dev->mr->lkey
+ };
+ struct ib_send_wr wr = {
+ .sg_list = &list,
+ .num_sge = 1,
+ .opcode = IB_WR_SEND,
+ .send_flags = IB_SEND_SIGNALED,
+ .wr = {
+ .ud = {
+ .remote_qpn = QP_MULTICAST_QPN,
+ .remote_qkey = QP_DEF_QKEY}
+ }
+ };
+ struct ib_send_wr *bad_wr;
+ union ib_gid dgid;
+ struct ib_ah_attr ah_attr = {
+ .dlid = QP_MCAST_LID,
+ .sl = 0,
+ .src_path_bits = 0,
+ .port_num = pinfop->port_num
+ };
+ struct ud_tx_buf *tbuf;
+ int ret;
+
+ i = udp->next_xmit;
+ tbuf = &udp->txbuf[i];
+ if (tbuf->vaddr)
+ return -ENOBUFS;
+ if (flags & XS_UD_COPY_MSG) {
+ addr = kmalloc(len + 40, GFP_KERNEL);
+ if (!addr)
+ return -ENOMEM;
+ memcpy(addr, msgp, len);
+ }
+ mapping = ib_dma_map_single(ca, addr, len + 40, DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping))) {
+ if (flags & XS_UD_COPY_MSG)
+ kfree(addr);
+ return -EIO;
+ }
+ tbuf->vaddr = addr;
+ tbuf->mapping = mapping;
+ tbuf->len = len + 40;
+ udp->next_xmit = (i + 1) % XS_UD_SEND_WQE;
+ list.addr = mapping;
+ wr.wr_id = i | XSUD_SEND_WRID;
+ /*
+ * Create a address handle and transmit the message
+ */
+ memset(&dgid, 0, sizeof(dgid));
+ /*
+ * Send it all Nodes IPv6 multicast address
+ * 0xff02::01
+ */
+ *((u32 *) dgid.raw) = cpu_to_be32(0xff020000);
+ dgid.raw[15] = 1;
+
+ ah_attr.grh.hop_limit = 1;
+ ah_attr.grh.dgid = dgid;
+ ah_attr.ah_flags = IB_AH_GRH;
+ tbuf->ah = ib_create_ah(pinfop->xs_dev->pd, &ah_attr);
+ if (IS_ERR(tbuf->ah)) {
+ XDDS_ERROR("%s: ib_create_ah failed, port: %d, index: %d\n",
+ __func__, pinfop->port_num, i);
+ ret = PTR_ERR(tbuf->ah);
+ goto err;
+ }
+ wr.wr.ud.ah = tbuf->ah;
+ ret = ib_post_send(udp->qp, &wr, &bad_wr);
+ if (ret)
+ goto err1;
+ return 0;
+err1:
+ ib_destroy_ah(tbuf->ah);
+ tbuf->ah = 0;
+err:
+ tbuf->vaddr = 0;
+ ib_dma_unmap_single(ca, tbuf->mapping, tbuf->len, DMA_TO_DEVICE);
+ tbuf->mapping = 0;
+ if (flags & XS_UD_COPY_MSG)
+ kfree(addr);
+ return ret;
+}
+
+int xs_ud_create(struct xscore_port *pinfop,
+ void (*callback)(void *, void *, int), void *arg)
+{
+ int ret = 0;
+ struct ib_ud_ctx *udp;
+ struct ib_qp_init_attr init_attr = {
+ .cap = {
+ .max_send_wr = XS_UD_SEND_WQE + 1,
+ .max_recv_wr = XS_UD_RECV_WQE + 1,
+ .max_send_sge = 1,
+ .max_recv_sge = 1},
+ .qp_type = IB_QPT_UD,
+ };
+ struct ib_qp_attr qp_attr = {
+ .qp_state = IB_QPS_INIT,
+ .pkey_index = 0,
+ .port_num = pinfop->port_num,
+ .qkey = QP_DEF_QKEY
+ };
+
+ /*
+ * Only do this once per port
+ */
+ if (pinfop->ib_ud_ctx != NULL)
+ return 0;
+
+ XDDS_INFO("%s: Creating guid: 0x%llx\n", __func__, pinfop->guid);
+
+ udp = kmalloc(sizeof(*udp), GFP_KERNEL);
+ if (!udp)
+ return -ENOMEM;
+ memset(udp, 0, sizeof(*udp));
+ udp->pinfop = pinfop;
+ udp->callback = callback;
+ udp->client_arg = arg;
+
+ pinfop->ib_ud_ctx = udp;
+ /*
+ * Create completion Q for send and receive (A single one is enough)
+ */
+ udp->cq = ib_create_cq(pinfop->xs_dev->device,
+ ud_compl_handler, NULL,
+ (void *)udp, XS_UD_RECV_WQE + XS_UD_SEND_WQE, 0);
+ if (IS_ERR(udp->cq)) {
+ ret = PTR_ERR(udp->cq);
+ XDDS_ERROR("%s: b_create_cq, port: %d, ret : %d\n",
+ __func__, pinfop->port_num, ret);
+ goto err_0;
+ }
+
+ init_attr.send_cq = udp->cq;
+ init_attr.recv_cq = udp->cq;
+
+ udp->qp = ib_create_qp(pinfop->xs_dev->pd, &init_attr);
+ if (IS_ERR(udp->qp)) {
+ ret = PTR_ERR(udp->qp);
+ XDDS_ERROR("%s: b_create_qp, port: %d, ret : %d\n",
+ __func__, pinfop->port_num, ret);
+ goto err_1;
+ }
+ /*
+ * Now move the QP to RTS state and post recvs
+ */
+ ret = ib_modify_qp(udp->qp, &qp_attr,
+ IB_QP_STATE |
+ IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY);
+ if (ret) {
+ XDDS_ERROR("%s: ib_modify_qp, port: %d, ret : %d\n",
+ __func__, pinfop->port_num, ret);
+ goto err_2;
+ }
+
+ qp_attr.qp_state = IB_QPS_RTR;
+
+ ret = ib_modify_qp(udp->qp, &qp_attr, IB_QP_STATE);
+ if (ret) {
+ XDDS_ERROR("%s: ib_modify_qp, port: %d, ret : %d\n",
+ __func__, pinfop->port_num, ret);
+ goto err_2;
+ }
+
+ qp_attr.qp_state = IB_QPS_RTS;
+ qp_attr.sq_psn = 0;
+
+ ret = ib_modify_qp(udp->qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
+ if (ret) {
+ XDDS_ERROR("%s: ib_modify_qp, port: %d, ret : %d\n",
+ __func__, pinfop->port_num, ret);
+ goto err_2;
+ }
+ /*
+ * Now post recvs
+ */
+ ret = xs_ud_post_recv(udp, 0, XS_UD_RECV_WQE);
+ if (ret) {
+ XDDS_ERROR("%s: xs_ud_post_recv, port: %d, ret : %d\n",
+ __func__, pinfop->port_num, ret);
+ goto err_2;
+ }
+
+ (void)ib_req_notify_cq(udp->cq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+
+ return 0;
+err_2:
+ ib_destroy_qp(udp->qp);
+err_1:
+ ib_destroy_cq(udp->cq);
+err_0:
+ kfree(udp);
+ pinfop->ib_ud_ctx = 0;
+ return ret;
+}
+
+void xs_ud_destroy(struct xscore_port *pinfop)
+{
+ struct ib_ud_ctx *udp = pinfop->ib_ud_ctx;
+ struct ib_device *ca = pinfop->xs_dev->device;
+ int i;
+
+ if (!udp)
+ return;
+ ib_destroy_qp(udp->qp);
+ ib_destroy_cq(udp->cq);
+ /*
+ * Flush out all buffers
+ */
+ for (i = 0; i < XS_UD_RECV_WQE; i++) {
+ struct ud_rx_buf *rbuf = &udp->rxbuf[i];
+
+ if (rbuf->mapping)
+ ib_dma_unmap_single(ca, rbuf->mapping,
+ MAX_UD_RX_BUF_SIZE,
+ DMA_FROM_DEVICE);
+ if (rbuf->vaddr != NULL)
+ kfree(rbuf->vaddr);
+ }
+ for (i = 0; i < XS_UD_SEND_WQE; i++) {
+ struct ud_tx_buf *tbuf = &udp->txbuf[i];
+
+ if (tbuf->mapping)
+ ib_dma_unmap_single(ca, tbuf->mapping, tbuf->len,
+ DMA_TO_DEVICE);
+ if (tbuf->vaddr != NULL)
+ kfree(tbuf->vaddr);
+ }
+ kfree(udp);
+}
+
+void xs_ud_free(void *msg)
+{
+ void *p = msg - sizeof(struct ib_grh);
+
+ XDDS_FUNCTION("%s: Freeing buffer: %p\n", __func__, p);
+ kfree(p);
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XS_VERSIONS_H_INCLUDED__
+#define __XS_VERSIONS_H_INCLUDED__
+
+/*
+ * for the simplest implementation, the following is defined as hex integers
+ *
+ * e.g. version string 2.4.5 will be 0x020405
+ *
+ * The max version string can be: 255.255.255 (0xffffff)
+ *
+ */
+
+/* Current Linux driver version */
+#define XSIGO_LINUX_DRIVER_VERSION 0x030000 /* 3.0.0 */
+
+/* The minimum xsigos version that works with above driver version */
+#define MINIMUM_XSIGOS_VERSION 0x010504 /* 1.5.4 */
+
+#endif /* __XS_VERSIONS_H_INCLUDED__ */
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _XSCORE_H_
+#define _XSCORE_H_
+
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/dma-mapping.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cm.h>
+
+#include <linux/version.h>
+
+#include <rdma/ib_addr.h>
+
+#include "xs_compat.h"
+
+#define XSCORE_PORT_UP 100
+#define XSCORE_PORT_DOWN 101
+
+/* Support MAX of 4 PAGES */
+#define XSCORE_MAX_RXFRAGS 4
+
+enum xscore_conn_state {
+ XSCORE_CONN_INIT = 1,
+ XSCORE_CONN_ERR,
+ XSCORE_CONN_CONNECTED,
+ XSCORE_CONN_LDISCONNECTED,
+ XSCORE_CONN_RDISCONNECTED,
+ XSCORE_DEVICE_REMOVAL,
+};
+
+struct xscore_port;
+struct xscore_desc;
+
+struct xscore_buf_info {
+ unsigned long addr;
+ void *cookie;
+ int sz;
+ int status;
+ unsigned long time_stamp;
+};
+
+struct xscore_conn_ctx {
+ /*
+ * These are public attributes which needs to be set
+ * These can be made a different structure and copied
+ * over here XXX
+ */
+
+ int tx_ring_size;
+ int rx_ring_size;
+ int rx_buf_size;
+ /* In Interrupt mode coalescing parameters */
+ u32 tx_coalesce_usecs;
+ u32 tx_max_coalesced_frames;
+ u32 rx_coalesce_usecs;
+ u32 rx_max_coalesced_frames;
+ u32 features;
+#define XSCORE_NO_SEND_COMPL_INTR 0x1
+#define XSCORE_SG_SUPPORT 0x2
+#define XSCORE_RDMA_SUPPORT 0x4
+#define XSCORE_NO_RECV_COMPL_INTR 0x8
+#define XSCORE_FMR_SUPPORT 0x10
+#define XSCORE_DONT_FREE_SENDBUF 0x20
+#define XSCORE_8K_IBMTU_SUPPORT 0x40
+#define XSCORE_USE_CHECKSUM (1 << 31)
+ void (*send_compl_handler)(void *client_arg, void *, int status,
+ int n);
+ void (*recv_msg_handler)(void *client_arg, void *, int sz, int status,
+ int n);
+ void (*recv_compl_handler)(void *client_arg);
+ void (*event_handler)(void *client_arg, int event);
+ u8 *(*alloc_buf)(void *client_arg, void **cookie, int sz);
+ struct page *(*alloc_page_bufs)(void *client_arg, void **cookie,
+ int *sz, int element);
+#define XSCORE_SEND_BUF 1
+#define XSCORE_RECV_BUF 2
+ void (*free_buf)(void *client_arg, void *cookie, int dir);
+ char priv_data[IB_CM_REQ_PRIVATE_DATA_SIZE];
+ int priv_data_len;
+ void *client_arg;
+ u64 service_id;
+ union ib_gid dgid;
+ u64 dguid;
+ u16 dlid;
+ int max_fmr_pages;
+ int fmr_pool_size;
+ u8 cm_timeout;
+ u8 cm_retries;
+ /*
+ * These are private attributes
+ */
+ spinlock_t lock;
+ struct mutex mlock;
+ enum xscore_conn_state state;
+ int status;
+ struct xscore_port *port;
+ struct ib_cm_id *cm_id;
+ struct ib_sa_path_rec path_rec;
+ struct ib_cq *scq;
+ struct ib_cq *rcq;
+ struct ib_qp *qp;
+ int local_qpn;
+ int remote_qpn;
+ struct ib_sge *tx_sge;
+ struct ib_fmr_pool *fmr_pool;
+ struct xscore_desc *tx_ring;
+ struct xscore_desc *rx_ring;
+ int next_xmit;
+ struct completion done;
+ int flags;
+#define XSCORE_SYNCHRONOUS 0x1
+
+#define XSCORE_NUM_RWC 128
+#define XSCORE_NUM_SWC 8
+
+ struct ib_wc rwc[XSCORE_NUM_RWC];
+ int total_rwc;
+ int cur_rwc;
+ struct ib_wc swc[XSCORE_NUM_SWC];
+ int total_swc;
+ int cur_swc;
+};
+
+/*
+ * This bit is used to signal soft-hca to defer processing in case of
+ * called in interrupt disabled context
+ */
+#define XSCORE_DEFER_PROCESS (1 << 31)
+
+int xscore_post_send_sg(struct xscore_conn_ctx *ctx, struct sk_buff *skb,
+ int oflags);
+int xscore_post_send(struct xscore_conn_ctx *ctx, void *addr, int len,
+ int flags);
+int xscore_enable_txintr(struct xscore_conn_ctx *ctx);
+int xscore_enable_rxintr(struct xscore_conn_ctx *ctx);
+int xscore_conn_connect(struct xscore_conn_ctx *ctx, int flags);
+void xscore_conn_disconnect(struct xscore_conn_ctx *ctx, int flags);
+int xscore_conn_init(struct xscore_conn_ctx *ctx, struct xscore_port *port);
+void xscore_conn_destroy(struct xscore_conn_ctx *ctx);
+struct xscore_port *xscore_get_port(unsigned long hndl);
+int xscore_read_buf(struct xscore_conn_ctx *ctx, struct xscore_buf_info *bp);
+int xscore_poll_send(struct xscore_conn_ctx *ctx, struct xscore_buf_info *bp);
+int xscore_refill_recv(struct xscore_conn_ctx *ctx, int gfp_flags);
+u8 xscore_port_num(struct xscore_port *port);
+int xscore_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int xscore_wait_for_sessions(u8 calc_time);
+
+typedef void (*xcpm_receive_message_handler) (void *xsmp_hndl,
+ u8 *data, int length);
+typedef void (*xcpm_receive_event_handler) (void *xsmp_hndl, int event);
+typedef int (*xcpm_callout_event_handler) (char *name);
+
+enum xsmp_svc_state {
+ SVC_STATE_DOWN = 1,
+ SVC_STATE_UP,
+};
+
+struct xsmp_service_reg_info {
+ enum xsmp_svc_state svc_state;
+ xcpm_receive_message_handler receive_handler;
+ xcpm_receive_event_handler event_handler;
+ xcpm_callout_event_handler callout_handler;
+ u16 ctrl_message_type;
+ u16 resource_flag_index;
+ int flags;
+ atomic_t ref_cnt;
+};
+
+struct xsmp_session_info {
+ char chassis_name[64];
+ char session_name[64];
+ u32 version;
+ struct xscore_port *port;
+ struct ib_device *ib_device;
+ struct device *dma_device;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ u8 is_shca;
+ u64 dguid;
+};
+
+/* extern declarations */
+extern u32 xcpm_resource_flags;
+extern int boot_flag;
+extern struct list_head xscore_port_list;
+extern int shca_csum;
+extern int xsigod_enable;
+extern char *os_version;
+extern char *os_arch;
+extern char hostname_str[];
+extern char system_id_str[];
+extern int xscore_create_procfs_entries(void);
+extern void xscore_remove_procfs_entries(void);
+extern void xcpm_port_add_proc_entry(struct xscore_port *port);
+extern void xcpm_port_remove_proc_entry(struct xscore_port *port);
+extern void xsmp_ulp_notify(struct xscore_port *p, int e);
+extern int xscore_wait_for_sessions(u8 cacl_time);
+
+/*
+ * All XCPM service message functions
+ */
+
+int xsmp_sessions_match(struct xsmp_session_info *, void*);
+
+int xcpm_register_service(struct xsmp_service_reg_info *s_info);
+
+int xcpm_unregister_service(int service_id);
+
+int xcpm_send_message(void *xsmp_hndl, int service_id,
+ u8 *data, int length);
+
+int xcpm_get_xsmp_session_info(void *xsmp_hndl,
+ struct xsmp_session_info *ip);
+
+int xcpm_check_duplicate_names(void *xsmp_hndl, char *name, u8 svc_id);
+int xcpm_send_msg_xsigod(void *xsmp_hndl, void *msg, int len);
+
+void *xcpm_alloc_msg(int sz);
+
+void xcpm_free_msg(void *msg);
+
+int xcpm_is_xsigod_enabled(void);
+
+#endif /* _XSCORE_H_ */
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * This file implements XSCORE API used by client drivers
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <linux/jiffies.h>
+
+#include "xscore.h"
+#include "xscore_priv.h"
+#include "xsmp.h"
+
+/*
+ * For now, to enable the driver to use checksum, and not iCRC, the user should
+ * should shca to use set the following module parameters:
+ * # modprobe ib_xgc icrc_rx=1 icrc_tx=1
+ * # modprobe xscore shca_csum=1
+ * You'll need to do this on the chassis's shca too
+ */
+int shca_csum = 1;
+module_param(shca_csum, int, 0644);
+MODULE_PARM_DESC(shca_csum,
+"Set value to 1 to default the shca to use checksum instead of icrc32");
+
+struct xt_cm_private_data {
+ u64 vid;
+ u16 qp_type;
+ u16 max_ctrl_msg_size;
+ u32 data_qp_type;
+} __packed;
+
+struct xscore_desc {
+ dma_addr_t mapping;
+ dma_addr_t rxmapping[XSCORE_MAX_RXFRAGS];
+ void *vaddr;
+ size_t size;
+ dma_addr_t *sg_mapping;
+ struct sk_buff *skb;
+ struct page *page;
+ int flags;
+ unsigned long time_stamp;
+ enum dma_data_direction direction;
+};
+
+static int xscore_eth_mtu = IB_MTU_4096;
+module_param(xscore_eth_mtu, int, 0644);
+
+static int xscore_ib_mtu = IB_MTU_2048;
+module_param(xscore_ib_mtu, int, 0644);
+
+static int qp_retry_count = 6;
+module_param(qp_retry_count, int, 0644);
+
+static int qp_timeout = 16;
+module_param(qp_timeout, int, 0644);
+
+static int rdma_responder_resources = 16;
+
+module_param(rdma_responder_resources, int, 0644);
+
+static int xscore_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static void _xscore_conn_disconnect(struct xscore_conn_ctx *ctx, int flags);
+
+static void xscore_qp_event(struct ib_event *event, void *context)
+{
+ pr_err("QP event %d\n", event->event);
+}
+
+static void xscore_reset_rxdescriptor(struct xscore_desc *desc)
+{
+ desc->vaddr = 0;
+ desc->page = 0;
+ desc->skb = 0;
+ desc->sg_mapping = 0;
+}
+
+static int xscore_new_cm_id(struct xscore_conn_ctx *ctx)
+{
+ struct ib_cm_id *new_cm_id;
+
+ new_cm_id = ib_create_cm_id(ctx->port->xs_dev->device,
+ xscore_cm_handler, ctx);
+ if (IS_ERR(new_cm_id))
+ return PTR_ERR(new_cm_id);
+
+ if (ctx->cm_id)
+ ib_destroy_cm_id(ctx->cm_id);
+ ctx->cm_id = new_cm_id;
+
+ return 0;
+}
+
+static int xs_dma_map_tx(struct xscore_conn_ctx *ctx,
+ struct xscore_desc *desc, int *nfrags)
+{
+ struct xscore_port *port = ctx->port;
+ struct ib_device *ca = port->xs_dev->device;
+ struct sk_buff *skb = desc->skb;
+ dma_addr_t *mapping = desc->sg_mapping;
+ int i;
+ int off;
+ struct ib_sge *tx_sge = ctx->tx_sge;
+
+ if (skb_headlen(skb)) {
+ mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
+ DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
+ return -EIO;
+ ib_dma_sync_single_for_device(ca, mapping[0],
+ skb_headlen(skb), DMA_TO_DEVICE);
+
+ off = 1;
+ tx_sge[0].addr = mapping[0];
+ tx_sge[0].length = skb_headlen(skb);
+ tx_sge[0].lkey = port->xs_dev->mr->lkey;
+ } else
+ off = 0;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ mapping[i + off] = ib_dma_map_page(ca, skb_frag_page(frag),
+ frag->page_offset,
+ skb_frag_size(frag),
+ DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
+ goto partial_error;
+ ib_dma_sync_single_for_device(ca, mapping[i + off],
+ frag->size, DMA_TO_DEVICE);
+ tx_sge[i + off].addr = mapping[i + off];
+ tx_sge[i + off].length = frag->size;
+ tx_sge[i + off].lkey = port->xs_dev->mr->lkey;
+ }
+ *nfrags = skb_shinfo(skb)->nr_frags + off;
+ return 0;
+
+partial_error:
+ for (; i > 0; --i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
+
+ ib_dma_unmap_page(ca, mapping[i - !off], skb_frag_size(frag),
+ DMA_TO_DEVICE);
+ }
+
+ if (off)
+ ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb),
+ DMA_TO_DEVICE);
+
+ return -EIO;
+}
+
+static void xs_dma_unmap_tx(struct ib_device *ca, struct xscore_desc *desc)
+{
+ struct sk_buff *skb = desc->skb;
+ dma_addr_t *mapping = desc->sg_mapping;
+ int i;
+ int off;
+
+ if (skb_headlen(skb)) {
+ ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb),
+ DMA_TO_DEVICE);
+ off = 1;
+ } else
+ off = 0;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ ib_dma_unmap_page(ca, mapping[i + off], skb_frag_size(frag),
+ DMA_TO_DEVICE);
+ }
+}
+
+int xscore_post_send_sg(struct xscore_conn_ctx *ctx, struct sk_buff *skb,
+ int oflags)
+{
+ struct ib_send_wr wr, *bad_wr;
+ int ret;
+ int nfrags = 0;
+ struct xscore_desc *desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+
+ if (ctx->state != XSCORE_CONN_CONNECTED) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ desc = &ctx->tx_ring[ctx->next_xmit];
+ if (desc->skb) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ wr.next = NULL;
+ wr.wr_id = ctx->next_xmit;
+ wr.sg_list = ctx->tx_sge;
+ wr.opcode = IB_WR_SEND;
+ wr.send_flags = IB_SEND_SIGNALED;
+
+ desc->skb = skb;
+ /*
+ * perform DMA mapping of the SKB
+ */
+ ret = xs_dma_map_tx(ctx, desc, &nfrags);
+ if (unlikely(ret)) {
+ desc->skb = 0;
+ goto out;
+ }
+
+ ctx->next_xmit = (ctx->next_xmit + 1) % ctx->tx_ring_size;
+
+ wr.num_sge = nfrags;
+
+ if (oflags & XSCORE_DEFER_PROCESS)
+ wr.send_flags |= XSCORE_DEFER_PROCESS;
+
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ /* Note the Time stamp */
+ desc->time_stamp = jiffies;
+
+ ret = ib_post_send(ctx->qp, &wr, &bad_wr);
+
+ if (ret) {
+ xs_dma_unmap_tx(ctx->port->xs_dev->device, desc);
+ desc->skb = 0;
+ }
+
+ IB_INFO("%s: ret %d, nxmit: %d, nfrags: %d\n", __func__,
+ ret, ctx->next_xmit, nfrags);
+ return ret;
+out:
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(xscore_post_send_sg);
+
+int xscore_post_send(struct xscore_conn_ctx *ctx, void *addr, int len,
+ int oflags)
+{
+ struct xscore_port *port = ctx->port;
+ struct ib_device *ca = port->xs_dev->device;
+ dma_addr_t mapping;
+ struct ib_sge list;
+ struct ib_send_wr wr, *bad_wr;
+ int ret = 0;
+ struct xscore_desc *desc;
+ unsigned long flags;
+
+ IB_INFO("%s: Addr: %p, Len: %d, DGUID: 0x%llx\n", __func__, addr,
+ len, ctx->dguid);
+
+ spin_lock_irqsave(&ctx->lock, flags);
+
+ if (ctx->state != XSCORE_CONN_CONNECTED) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ desc = &ctx->tx_ring[ctx->next_xmit];
+ if (desc->vaddr) {
+ ret = -ENOBUFS;
+ goto out;
+ }
+
+ mapping = ib_dma_map_single(ca, addr, len, DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping))) {
+ ret = -EIO;
+ goto out;
+ }
+
+ list.addr = mapping;
+ list.length = len;
+ list.lkey = port->xs_dev->mr->lkey;
+
+ wr.next = NULL;
+ wr.wr_id = ctx->next_xmit;
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+ wr.opcode = IB_WR_SEND;
+ wr.send_flags = IB_SEND_SIGNALED;
+
+ ctx->next_xmit = (ctx->next_xmit + 1) % ctx->tx_ring_size;
+
+ if (oflags & XSCORE_DEFER_PROCESS)
+ wr.send_flags |= XSCORE_DEFER_PROCESS;
+
+ ib_dma_sync_single_for_device(ca, mapping, len, DMA_TO_DEVICE);
+
+ desc->vaddr = addr;
+ desc->mapping = mapping;
+ desc->size = len;
+ desc->skb = 0;
+
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ ret = ib_post_send(ctx->qp, &wr, &bad_wr);
+
+ spin_lock_irqsave(&ctx->lock, flags);
+
+ if (ret) {
+ ib_dma_unmap_single(ca, mapping, len, DMA_TO_DEVICE);
+ desc->vaddr = 0;
+ desc->mapping = 0;
+ }
+out:
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ IB_INFO("%s: ret %d, nxmit: %d\n", __func__, ret, ctx->next_xmit);
+
+ return ret;
+}
+EXPORT_SYMBOL(xscore_post_send);
+
+static int xs_post_recv(struct xscore_conn_ctx *ctx, int offset, int n,
+ int gfp_flags, int fillholes)
+{
+ struct xscore_port *port = ctx->port;
+ struct ib_device *ca = port->xs_dev->device;
+ struct ib_sge list[XSCORE_MAX_RXFRAGS];
+ struct ib_recv_wr wr;
+ struct ib_recv_wr *bad_wr;
+ int i, j, ret = 0;
+ dma_addr_t *mapping;
+ int rsize = ctx->rx_buf_size;
+
+ for (i = 0; i < n; ++i, ++offset) {
+ struct xscore_desc *desc = &ctx->rx_ring[offset];
+ void *addr = NULL;
+
+ j = 1;
+
+ if (fillholes && (desc->vaddr || desc->page || desc->skb))
+ continue;
+
+ xscore_reset_rxdescriptor(desc);
+
+ mapping = desc->rxmapping;
+
+ if (ctx->alloc_page_bufs) {
+ desc->page =
+ ctx->alloc_page_bufs(ctx->client_arg,
+ (void **)&desc->page, &rsize,
+ i);
+ if (!desc->page)
+ ret = -ENOMEM;
+ } else if (ctx->alloc_buf) {
+ addr =
+ ctx->alloc_buf(ctx->client_arg, (void **)&desc->skb,
+ rsize);
+ if (!addr)
+ ret = -ENOMEM;
+ } else {
+ addr = kmalloc(rsize, gfp_flags);
+ if (!addr)
+ ret = -ENOMEM;
+ }
+
+ if (ret == ENOMEM) {
+ if (fillholes)
+ return ret;
+ goto partial_failure;
+ }
+
+ desc->size = rsize;
+ /*
+ * Map the buffer and give the bus address
+ */
+ if (addr) {
+ desc->vaddr = addr;
+ mapping[0] = ib_dma_map_single(ca, addr, rsize,
+ DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping[0]))) {
+ ret = -EIO;
+ if (fillholes)
+ return ret;
+ goto partial_failure;
+ }
+ list[0].addr = mapping[0];
+ list[0].length = rsize;
+ list[0].lkey = port->xs_dev->mr->lkey;
+ } else {
+ for (j = 0; j < (rsize / PAGE_SIZE); ++j) {
+ /*
+ * ESX doesn't allow to reference page
+ * descriptor in any form of pointer
+ * arithmetic
+ */
+ mapping[j] =
+ ib_dma_map_page(ca, (desc->page + j), 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ if (unlikely
+ (ib_dma_mapping_error(ca, mapping[j]))) {
+ ret = -EIO;
+ for (; j > 0; --j)
+ ib_dma_unmap_page(ca,
+ mapping[j - 1],
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ if (fillholes)
+ return ret;
+ goto partial_failure;
+ }
+ list[j].addr = mapping[j];
+ list[j].length = PAGE_SIZE;
+ list[j].lkey = port->xs_dev->mr->lkey;
+ }
+ }
+
+ desc->sg_mapping = mapping;
+ wr.next = NULL;
+ wr.wr_id = (int)offset;
+ wr.sg_list = list;
+ wr.num_sge = j;
+ ret = ib_post_recv(ctx->qp, &wr, &bad_wr);
+ if (ret) {
+ pr_err("xs_post_recv: ib_post_recv error,");
+ pr_err("i = %d, ret = %d\n", i, ret);
+ if (fillholes)
+ return ret;
+ goto partial_failure;
+ }
+ }
+ return 0;
+partial_failure:
+ pr_err("%s: Failed to allocate buffers\n", __func__);
+ for (; i >= 0; i--, offset--) {
+ struct xscore_desc *desc = &ctx->rx_ring[offset];
+
+ if (desc->sg_mapping) {
+ if (desc->page) {
+ for (j = 0; j < (rsize / PAGE_SIZE); ++j)
+ ib_dma_unmap_page(ca,
+ desc->sg_mapping[j],
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ } else {
+ ib_dma_unmap_single(ca, desc->sg_mapping[0],
+ rsize, DMA_FROM_DEVICE);
+ }
+ desc->sg_mapping = 0;
+ }
+ if (desc->page || desc->vaddr || desc->skb) {
+ if (ctx->free_buf)
+ ctx->free_buf(ctx->client_arg,
+ desc->page ? desc->
+ page : (desc->skb ? desc->skb :
+ desc->vaddr),
+ XSCORE_RECV_BUF);
+ else
+ kfree(desc->vaddr);
+
+ xscore_reset_rxdescriptor(desc);
+ }
+ }
+ return ret;
+}
+
+int xscore_refill_recv(struct xscore_conn_ctx *ctx, int gfp_flags)
+{
+ return xs_post_recv(ctx, 0, ctx->rx_ring_size, gfp_flags, 1);
+}
+EXPORT_SYMBOL(xscore_refill_recv);
+
+int xscore_enable_txintr(struct xscore_conn_ctx *ctx)
+{
+ return ib_req_notify_cq(ctx->scq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+}
+EXPORT_SYMBOL(xscore_enable_txintr);
+
+int xscore_enable_rxintr(struct xscore_conn_ctx *ctx)
+{
+ return ib_req_notify_cq(ctx->rcq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+}
+EXPORT_SYMBOL(xscore_enable_rxintr);
+
+static int _xscore_poll_send(struct xscore_conn_ctx *ctx)
+{
+ struct ib_device *ca = ctx->port->xs_dev->device;
+ struct ib_wc wc;
+ struct xscore_desc *desc;
+ int i;
+ int err = 0;
+ int ret;
+
+ IB_INFO("%s: Completion GUID: 0x%llx\n", __func__, ctx->dguid);
+
+ while ((ret = ib_poll_cq(ctx->scq, 1, &wc)) > 0) {
+ i = (int)wc.wr_id;
+ if (i >= ctx->tx_ring_size) {
+ IB_ERROR("%s send completion error wr_id %d > %d\n",
+ __func__, i, ctx->tx_ring_size);
+ err++;
+ break;
+ }
+ desc = &ctx->tx_ring[i];
+ if (desc->skb)
+ xs_dma_unmap_tx(ca, desc);
+ else
+ ib_dma_unmap_single(ca, desc->mapping, desc->size,
+ DMA_TO_DEVICE);
+
+ if (ctx->send_compl_handler)
+ ctx->send_compl_handler(ctx->client_arg, desc->vaddr,
+ wc.status, i);
+ else if (ctx->free_buf)
+ ctx->free_buf(ctx->client_arg,
+ desc->skb ? desc->skb : desc->vaddr,
+ XSCORE_SEND_BUF);
+ else if ((ctx->features & XSCORE_DONT_FREE_SENDBUF) == 0)
+ kfree(desc->vaddr);
+
+ desc->mapping = 0;
+ desc->skb = 0;
+ desc->vaddr = 0;
+ if (wc.status) {
+ err++;
+ break;
+ }
+ }
+ if (!ret && !err)
+ return 0;
+ if (err)
+ return wc.status;
+ return ret;
+}
+
+static void xscore_send_completion(struct ib_cq *cq, void *ctx_ptr)
+{
+ struct xscore_conn_ctx *ctx = ctx_ptr;
+ int err;
+again:
+ err = _xscore_poll_send(ctx);
+ if (!err
+ && ib_req_notify_cq(ctx->scq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) >
+ 0)
+ goto again;
+}
+
+int xscore_poll_send(struct xscore_conn_ctx *ctx, struct xscore_buf_info *bp)
+{
+ struct ib_device *ca = ctx->port->xs_dev->device;
+ struct ib_wc *wcp;
+ struct xscore_desc *desc;
+ int i;
+ int ret;
+
+ bp->status = 0;
+
+ /*
+ * Cache it here so that we do not go to IB stack every time
+ */
+ if (!ctx->total_swc) {
+ ret = ib_poll_cq(ctx->scq, XSCORE_NUM_SWC, &ctx->swc[0]);
+ if (ret > 0) {
+ ctx->total_swc = ret;
+ ctx->cur_swc = 0;
+ } else
+ return ret;
+ }
+
+ ctx->total_swc--;
+ wcp = &ctx->swc[ctx->cur_swc++];
+ i = (int)wcp->wr_id;
+ if (i >= ctx->tx_ring_size) {
+ IB_ERROR("%s Send completion error wrid %d (> %d)\n",
+ __func__, i, ctx->tx_ring_size);
+ return 0;
+ }
+ desc = &ctx->tx_ring[i];
+ if (desc->skb)
+ xs_dma_unmap_tx(ca, desc);
+ else
+ ib_dma_unmap_single(ca, desc->mapping, desc->size,
+ DMA_TO_DEVICE);
+ bp->addr = (unsigned long)desc->vaddr;
+ bp->sz = wcp->byte_len;
+ bp->cookie = desc->skb;
+ bp->time_stamp = desc->time_stamp;
+ desc->vaddr = 0;
+ desc->skb = 0;
+ desc->mapping = 0;
+ bp->status = wcp->status;
+ return 1;
+}
+EXPORT_SYMBOL(xscore_poll_send);
+
+int xscore_read_buf(struct xscore_conn_ctx *ctx, struct xscore_buf_info *bp)
+{
+ struct ib_device *ca = ctx->port->xs_dev->device;
+ struct ib_wc *wcp;
+ struct xscore_desc *desc;
+ int i, j;
+ int ret;
+
+ bp->status = 0;
+
+ /*
+ * Cache it here so that we do not go to IB stack every time
+ */
+ if (!ctx->total_rwc) {
+ ret = ib_poll_cq(ctx->rcq, XSCORE_NUM_RWC, &ctx->rwc[0]);
+ if (ret > 0) {
+ ctx->total_rwc = ret;
+ ctx->cur_rwc = 0;
+ } else
+ return ret;
+ }
+ ret = 1;
+
+ ctx->total_rwc--;
+ wcp = &ctx->rwc[ctx->cur_rwc++];
+ i = (int)wcp->wr_id;
+ if (i >= ctx->rx_ring_size) {
+ IB_ERROR("%s completion event error with wrid %d (> %d)\n",
+ __func__, i, ctx->rx_ring_size);
+ return 0;
+ }
+ desc = &ctx->rx_ring[i];
+ if (desc->page) {
+ for (j = 0; j < (desc->size / PAGE_SIZE); ++j)
+ ib_dma_unmap_page(ca, desc->sg_mapping[j], PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ bp->cookie = desc->page;
+ } else if (desc->skb || desc->vaddr) {
+ ib_dma_sync_single_for_cpu(ca, desc->sg_mapping[0], desc->size,
+ DMA_FROM_DEVICE);
+ ib_dma_unmap_single(ca, desc->sg_mapping[0], desc->size,
+ DMA_FROM_DEVICE);
+ bp->addr = (unsigned long)desc->vaddr;
+ bp->cookie = desc->skb;
+ } else {
+ ret = 0;
+ goto out;
+ }
+
+ bp->sz = wcp->byte_len;
+ bp->status = wcp->status;
+out:
+ xscore_reset_rxdescriptor(desc);
+ return ret;
+}
+EXPORT_SYMBOL(xscore_read_buf);
+
+static int xscore_poll_recv(struct xscore_conn_ctx *ctx)
+{
+ struct ib_device *ca = ctx->port->xs_dev->device;
+ struct ib_wc wc;
+ struct xscore_desc *desc;
+ int i, j;
+ void *vaddr;
+ int size;
+ int err = 0;
+ int ret = 0;
+
+ while ((ret = ib_poll_cq(ctx->rcq, 1, &wc)) > 0) {
+ i = (int)wc.wr_id;
+ if (i >= ctx->rx_ring_size) {
+ IB_ERROR("%s completion error with wr_id%d > size %d\n",
+ __func__, i, ctx->rx_ring_size);
+ err++;
+ break;
+ }
+ desc = &ctx->rx_ring[i];
+ if (desc->page) {
+ for (j = 0; j < (desc->size / PAGE_SIZE); ++j)
+ ib_dma_unmap_page(ca, desc->sg_mapping[j],
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ } else if (desc->skb || desc->vaddr) {
+ ib_dma_sync_single_for_cpu(ca, desc->sg_mapping[0],
+ desc->size, DMA_FROM_DEVICE);
+ ib_dma_unmap_single(ca, desc->sg_mapping[0], desc->size,
+ DMA_FROM_DEVICE);
+ }
+ /*
+ * Post new buffer back
+ */
+ vaddr = desc->vaddr;
+ size = wc.byte_len;
+
+ xscore_reset_rxdescriptor(desc);
+
+ /*
+ * Call completion callback, pass buffer size
+ * and client arg and status
+ */
+ if (ctx->recv_msg_handler)
+ ctx->recv_msg_handler(ctx->client_arg, vaddr, size,
+ wc.status, i);
+ /*
+ * If there is any error do not post anymore buffers
+ */
+ if (wc.status) {
+ err++;
+ break;
+ }
+ ctx->status = xs_post_recv(ctx, i, 1, GFP_ATOMIC, 0);
+ }
+ if (!ret && !err)
+ return 0;
+ if (err)
+ return wc.status;
+ return ret;
+}
+
+static void xscore_recv_completion(struct ib_cq *cq, void *ctx_ptr)
+{
+ struct xscore_conn_ctx *ctx = ctx_ptr;
+ int err;
+
+ if (ctx->recv_compl_handler) {
+ ctx->recv_compl_handler(ctx->client_arg);
+ return;
+ }
+again:
+ err = xscore_poll_recv(ctx);
+ if (!err
+ && ib_req_notify_cq(ctx->rcq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) >
+ 0)
+ goto again;
+}
+
+void xscore_conn_destroy(struct xscore_conn_ctx *ctx)
+{
+ int i;
+
+ mutex_lock(&ctx->mlock);
+ if (ctx->cm_id && !IS_ERR(ctx->cm_id))
+ ib_destroy_cm_id(ctx->cm_id);
+ if (ctx->qp && !IS_ERR(ctx->qp))
+ ib_destroy_qp(ctx->qp);
+ ctx->qp = 0;
+ /*
+ * Flush all recv and send completions
+ */
+ if (ctx->rcq && !IS_ERR(ctx->rcq)) {
+ if (ctx->recv_compl_handler)
+ ctx->recv_compl_handler(ctx->client_arg);
+ else
+ (void)xscore_poll_recv(ctx);
+ ib_destroy_cq(ctx->rcq);
+ }
+ ctx->rcq = 0;
+ if (ctx->scq && !IS_ERR(ctx->scq)) {
+ (void)_xscore_poll_send(ctx);
+ ib_destroy_cq(ctx->scq);
+ }
+ ctx->scq = 0;
+ if (ctx->tx_sge != NULL)
+ kfree(ctx->tx_sge);
+ ctx->tx_sge = 0;
+ if (ctx->tx_ring) {
+ for (i = 0; i < ctx->tx_ring_size; i++) {
+ struct xscore_desc *desc = &ctx->tx_ring[i];
+
+ if (desc->sg_mapping != NULL)
+ kfree(desc->sg_mapping);
+ desc->sg_mapping = 0;
+ }
+ vfree(ctx->tx_ring);
+ }
+ ctx->tx_ring = 0;
+ if (ctx->rx_ring)
+ vfree(ctx->rx_ring);
+ ctx->rx_ring = 0;
+ if (ctx->fmr_pool && !IS_ERR(ctx->fmr_pool))
+ ib_destroy_fmr_pool(ctx->fmr_pool);
+ ctx->fmr_pool = 0;
+ mutex_unlock(&ctx->mlock);
+ mutex_destroy(&ctx->mlock);
+}
+EXPORT_SYMBOL(xscore_conn_destroy);
+
+static int xscore_create_qpset(struct xscore_conn_ctx *ctx)
+{
+ struct ib_qp_init_attr init_attr;
+ int ret = 0, max_sge;
+
+ if (ctx->qp && !IS_ERR(ctx->qp))
+ ib_destroy_qp(ctx->qp);
+
+ memset(&init_attr, 0, sizeof(init_attr));
+ init_attr.event_handler = xscore_qp_event;
+ init_attr.cap.max_send_wr = ctx->tx_ring_size;
+ init_attr.cap.max_recv_wr = ctx->rx_ring_size;
+ init_attr.cap.max_recv_sge = XSCORE_MAX_RXFRAGS;
+
+ max_sge = ctx->port->xs_dev->dev_attr.max_sge;
+ if (max_sge >= (MAX_SKB_FRAGS + 1))
+ max_sge = MAX_SKB_FRAGS + 1;
+
+ if (ctx->features & XSCORE_SG_SUPPORT)
+ init_attr.cap.max_send_sge = max_sge;
+ else
+ init_attr.cap.max_send_sge = 1;
+ init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+ init_attr.qp_type = IB_QPT_RC;
+ init_attr.send_cq = ctx->scq;
+ init_attr.recv_cq = ctx->rcq;
+
+ ctx->qp = ib_create_qp(ctx->port->xs_dev->pd, &init_attr);
+ if (IS_ERR(ctx->qp)) {
+ ret = PTR_ERR(ctx->qp);
+ IB_ERROR("%s ib_create_qp failed %d\n", __func__, ret);
+ }
+ if ((ctx->features & XSCORE_NO_SEND_COMPL_INTR) == 0)
+ ib_req_notify_cq(ctx->scq, IB_CQ_NEXT_COMP);
+ if ((ctx->features & XSCORE_NO_RECV_COMPL_INTR) == 0)
+ ib_req_notify_cq(ctx->rcq, IB_CQ_NEXT_COMP);
+ return ret;
+}
+
+static int create_fmr_pool(struct xscore_conn_ctx *ctx)
+{
+ struct xscore_port *port = ctx->port;
+
+ struct ib_fmr_pool_param pool_params = {
+ .max_pages_per_fmr = ctx->max_fmr_pages,
+ .access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE,
+ .pool_size = ctx->fmr_pool_size,
+ .dirty_watermark = 32,
+ .page_shift = 12,
+ .flush_function = 0,
+ .flush_arg = 0,
+ .cache = 1
+ };
+
+ ctx->fmr_pool = ib_create_fmr_pool(port->xs_dev->pd, &pool_params);
+ if (IS_ERR(ctx->fmr_pool))
+ return PTR_ERR(ctx->fmr_pool);
+ return 0;
+}
+
+static void xscore_init_dest(struct xscore_conn_ctx *ctx)
+{
+ struct xscore_port *port = ctx->port;
+
+ if (port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+ ctx->dgid.global.subnet_prefix =
+ port->sgid.global.subnet_prefix;
+ ctx->dgid.global.interface_id = cpu_to_be64(ctx->dguid);
+ } else {
+ /*
+ * iboe_mac_vlan_to_ll(...) not implemented in
+ * 3.10.0-123.el7.x86_6
+ * copied what was in iboe_mac_vlan_to_ll(...)
+ * Our driver used only IB_LINK_LAYER_INFINIBAND
+ * iboe_mac_vlan_to_ll(&ctx->dgid, dmac, 0);
+ */
+ u64 mac = ctx->dguid;
+ u8 dmac[6];
+ u16 vid = 0;
+ int i;
+ union ib_gid *gid = &ctx->dgid;
+
+ for (i = 0; i < 6; i++) {
+ dmac[5 - i] = mac & 0xFF;
+ mac >>= 8;
+ }
+ memset(gid->raw, 0, 16);
+ *((__be32 *) gid->raw) = cpu_to_be32(0xfe800000);
+ if (vid < 0x1000) {
+ gid->raw[12] = vid & 0xff;
+ gid->raw[11] = vid >> 8;
+ } else {
+ gid->raw[12] = 0xfe;
+ gid->raw[11] = 0xff;
+ }
+ memcpy((void *)(gid->raw + 13), (void *)(mac + 3), 3);
+ memcpy((void *)(gid->raw + 8), (void *)mac, 3);
+ gid->raw[8] ^= 2;
+
+ }
+}
+
+int xscore_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+ return ib_modify_cq(cq, cq_count, cq_period);
+}
+EXPORT_SYMBOL(xscore_modify_cq);
+
+int xscore_conn_init(struct xscore_conn_ctx *ctx, struct xscore_port *port)
+{
+ int i;
+ int ret = 0;
+
+ ctx->cm_id = 0;
+ ctx->port = port;
+ ctx->next_xmit = 0;
+ ctx->fmr_pool = 0;
+ ctx->total_rwc = 0;
+ ctx->cur_rwc = 0;
+ ctx->total_swc = 0;
+ ctx->cur_swc = 0;
+ spin_lock_init(&ctx->lock);
+ mutex_init(&ctx->mlock);
+ init_completion(&ctx->done);
+
+ xscore_init_dest(ctx);
+ /*
+ * Allocate descriptors
+ */
+ ctx->tx_ring = vmalloc(ctx->tx_ring_size * sizeof(struct xscore_desc));
+ if (!ctx->tx_ring)
+ return -ENOMEM;
+ memset(ctx->tx_ring, 0, ctx->tx_ring_size * sizeof(struct xscore_desc));
+
+ ctx->rx_ring = vmalloc(ctx->rx_ring_size * sizeof(struct xscore_desc));
+ if (!ctx->rx_ring) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ memset(ctx->rx_ring, 0, ctx->rx_ring_size * sizeof(struct xscore_desc));
+
+ ctx->scq = ib_create_cq(ctx->port->xs_dev->device,
+ xscore_send_completion, NULL, ctx,
+ ctx->tx_ring_size, 0);
+ if (IS_ERR(ctx->scq)) {
+ ret = PTR_ERR(ctx->scq);
+ IB_ERROR("%s ib_create_cq scq failed %d\n", __func__, ret);
+ goto err;
+ }
+
+ ctx->rcq = ib_create_cq(ctx->port->xs_dev->device,
+ xscore_recv_completion, NULL, ctx,
+ ctx->rx_ring_size, 0);
+ if (IS_ERR(ctx->rcq)) {
+ ret = PTR_ERR(ctx->rcq);
+ IB_ERROR("%s ib_create_cq scq failed %d\n", __func__, ret);
+ goto err;
+ }
+
+ if ((ctx->features & XSCORE_NO_SEND_COMPL_INTR) == 0) {
+ ib_req_notify_cq(ctx->scq, IB_CQ_NEXT_COMP);
+ if (!ctx->tx_max_coalesced_frames || !ctx->tx_coalesce_usecs)
+ xscore_modify_cq(ctx->scq, ctx->tx_max_coalesced_frames,
+ ctx->tx_coalesce_usecs);
+ }
+
+ if ((ctx->features & XSCORE_NO_RECV_COMPL_INTR) == 0) {
+ ib_req_notify_cq(ctx->rcq, IB_CQ_NEXT_COMP);
+ if (!ctx->rx_max_coalesced_frames || !ctx->rx_coalesce_usecs)
+ xscore_modify_cq(ctx->rcq, ctx->rx_max_coalesced_frames,
+ ctx->rx_coalesce_usecs);
+ }
+
+ if (ctx->features & XSCORE_SG_SUPPORT) {
+ ctx->tx_sge =
+ kmalloc(sizeof(struct ib_sge) * (MAX_SKB_FRAGS + 1),
+ GFP_KERNEL);
+ if (!ctx->tx_sge) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ for (i = 0; i < ctx->tx_ring_size; i++) {
+ struct xscore_desc *desc = &ctx->tx_ring[i];
+
+ desc->sg_mapping =
+ kmalloc(sizeof(dma_addr_t) * (MAX_SKB_FRAGS + 1),
+ GFP_KERNEL);
+ if (!desc->sg_mapping) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+ }
+ ret = create_fmr_pool(ctx);
+ if ((ctx->features & XSCORE_FMR_SUPPORT)
+ && ret)
+ goto err;
+
+ return 0;
+err:
+ IB_ERROR("%s Error %d\n", __func__, ret);
+ xscore_conn_destroy(ctx);
+ return ret;
+}
+EXPORT_SYMBOL(xscore_conn_init);
+
+u8 xscore_port_num(struct xscore_port *port)
+{
+ return port->port_num;
+}
+EXPORT_SYMBOL(xscore_port_num);
+static void path_rec_complete(int status, struct ib_sa_path_rec *resp,
+ void *context)
+{
+ struct xscore_conn_ctx *ctx = context;
+
+ IB_INFO("%s status %d\n", __func__, status);
+
+ if (status)
+ IB_ERROR("%s: completed with error %d\n", __func__, status);
+ else
+ memcpy(&ctx->path_rec, resp, sizeof(struct ib_sa_path_rec));
+ ctx->status = status;
+ complete(&ctx->done);
+}
+
+static int use_path_rec;
+
+static int xscore_send_req(struct xscore_conn_ctx *ctx)
+{
+ struct ib_cm_req_param req;
+ struct ib_sa_path_rec path_rec;
+ struct ib_port_attr port_attr;
+ struct ib_sa_query *query;
+ u16 pkey;
+ int status;
+
+ memset(&req, 0, sizeof(req));
+
+ req.primary_path = &ctx->path_rec;
+ req.alternate_path = NULL;
+ req.service_id = ctx->service_id;
+ req.qp_num = ctx->qp->qp_num;
+ req.qp_type = ctx->qp->qp_type;
+ req.private_data = ctx->priv_data;
+ req.private_data_len = ctx->priv_data_len;
+ req.flow_control = 1;
+ req.starting_psn = 0;
+ req.peer_to_peer = 0;
+ req.initiator_depth = 1;
+
+ if (ctx->priv_data_len == sizeof(struct xt_cm_private_data)) {
+ struct xt_cm_private_data *pdata =
+ (struct xt_cm_private_data *)ctx->priv_data;
+ if (ctx->port->xs_dev->is_shca && shca_csum) {
+ ctx->features |= XSCORE_USE_CHECKSUM;
+ pdata->data_qp_type =
+ cpu_to_be32(be32_to_cpu(pdata->data_qp_type) |
+ shca_csum);
+ } else
+ ctx->features &= ~XSCORE_USE_CHECKSUM;
+ }
+
+ if (ctx->features & XSCORE_RDMA_SUPPORT)
+ req.responder_resources = min
+ (ctx->port->xs_dev->dev_attr.max_qp_rd_atom,
+ rdma_responder_resources);
+ else
+ req.responder_resources = 1;
+
+ req.remote_cm_response_timeout = 20;
+ req.local_cm_response_timeout = 20;
+ if (ctx->cm_timeout) {
+ req.remote_cm_response_timeout = ctx->cm_timeout;
+ req.local_cm_response_timeout = ctx->cm_timeout;
+ }
+ req.retry_count = qp_retry_count;
+ req.rnr_retry_count = 7;
+ req.max_cm_retries = 1;
+
+ memset(&path_rec, 0, sizeof(path_rec));
+
+ /*
+ * Fill up path record information here
+ */
+ (void)ib_query_port(ctx->port->xs_dev->device, ctx->port->port_num,
+ &port_attr);
+ path_rec.slid = cpu_to_be16(port_attr.lid);
+ path_rec.dlid = cpu_to_be16(ctx->dlid);
+ path_rec.sgid = ctx->port->sgid;
+ path_rec.dgid = ctx->dgid;
+ ib_query_pkey(ctx->port->xs_dev->device, ctx->port->port_num, 0, &pkey);
+ path_rec.pkey = cpu_to_be16(pkey);
+ path_rec.numb_path = 1;
+
+ if (use_path_rec && ctx->port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+ /*
+ * If IB get path record from SA
+ */
+ status =
+ ib_sa_path_rec_get(&xscore_sa_client,
+ ctx->port->xs_dev->device,
+ ctx->port->port_num, &path_rec,
+ IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID
+ | IB_SA_PATH_REC_PKEY |
+ IB_SA_PATH_REC_NUMB_PATH, 3000,
+ GFP_KERNEL, &path_rec_complete,
+ (void *)ctx, &query);
+
+ if (status) {
+ IB_ERROR
+ ("%s:ib_sa_path_rec_get completed with error %d\n",
+ __func__, status);
+ return status;
+ }
+
+ wait_for_completion(&ctx->done);
+ if (ctx->status) {
+ IB_ERROR
+ ("%s:wait_for_completion completed with error %d\n",
+ __func__, ctx->status);
+ return ctx->status;
+ }
+ } else {
+ req.primary_path = &path_rec;
+
+ if (ctx->port->link_layer == IB_LINK_LAYER_ETHERNET) {
+ path_rec.mtu = port_attr.active_mtu;
+ /*
+ * LLE card has an issue where it reports
+ * active MTU=4 for Jumbo and not 5
+ */
+ if (path_rec.mtu == 4)
+ path_rec.mtu = 5;
+
+ /*
+ * 8k IB MTU support is for vnics only
+ */
+ if (!(ctx->features & XSCORE_8K_IBMTU_SUPPORT)) {
+ if (path_rec.mtu > xscore_eth_mtu)
+ path_rec.mtu = xscore_eth_mtu;
+ if (xscore_eth_mtu > 5)
+ path_rec.mtu = 5;
+ }
+
+ path_rec.hop_limit = 2;
+ } else {
+ path_rec.mtu = xscore_ib_mtu;
+ path_rec.hop_limit = 0;
+ }
+ path_rec.reversible = 1;
+ path_rec.mtu_selector = 3;
+ path_rec.rate_selector = 2;
+ path_rec.rate = 3;
+ path_rec.packet_life_time_selector = 2;
+ path_rec.packet_life_time = 14;
+ }
+
+ init_completion(&ctx->done);
+ status = ib_send_cm_req(ctx->cm_id, &req);
+ if (status)
+ IB_ERROR("%s:ib_send_cm_req completed with error %d\n",
+ __func__, status);
+ return status;
+}
+
+int xscore_conn_connect(struct xscore_conn_ctx *ctx, int flags)
+{
+ int ret;
+
+ IB_FUNCTION("%s: Connecting to 0x%llx, LID: 0x%x, SID: 0x%llx\n",
+ __func__, ctx->dguid, ctx->dlid, ctx->service_id);
+
+ mutex_lock(&ctx->mlock);
+ _xscore_conn_disconnect(ctx, flags);
+ xscore_init_dest(ctx);
+ ret = xscore_create_qpset(ctx);
+ if (ret) {
+ IB_ERROR("%s xscore_create_qpset failed %d\n", __func__, ret);
+ mutex_unlock(&ctx->mlock);
+ return ret;
+ }
+ ctx->next_xmit = 0;
+
+ ret = xscore_new_cm_id(ctx);
+ if (ret) {
+ IB_ERROR("%s ib_create_cmid failed %d\n", __func__, ret);
+ ctx->cm_id = 0;
+ mutex_unlock(&ctx->mlock);
+ return ret;
+ }
+ init_completion(&ctx->done);
+ ctx->flags |= flags;
+ ret = xscore_send_req(ctx);
+ if (ret) {
+ IB_ERROR("%s xscore_send_req failed %d\n", __func__, ret);
+ mutex_unlock(&ctx->mlock);
+ return ret;
+ }
+ /*
+ * The user wants synchronous completion, wait for connection
+ * to be setup or fail
+ */
+ if (flags & XSCORE_SYNCHRONOUS)
+ wait_for_completion(&ctx->done);
+ ctx->flags &= ~flags;
+ mutex_unlock(&ctx->mlock);
+ if (flags & XSCORE_SYNCHRONOUS)
+ return ctx->status;
+ else
+ return ret;
+}
+EXPORT_SYMBOL(xscore_conn_connect);
+
+static void xscore_reclaim_recv_buffers(struct xscore_conn_ctx *ctx)
+{
+ struct ib_device *ca = ctx->port->xs_dev->device;
+ struct ib_wc wc;
+ struct xscore_desc *desc;
+ int i, j;
+
+ while (ib_poll_cq(ctx->rcq, 1, &wc) > 0) {
+ i = (int)wc.wr_id;
+ if (i >= ctx->rx_ring_size) {
+ IB_ERROR("%s completion error with wrid %d (> %d)\n",
+ __func__, i, ctx->rx_ring_size);
+ break;
+ }
+ desc = &ctx->rx_ring[i];
+ if (!desc->page && !desc->vaddr && !desc->skb) {
+ IB_ERROR("%s: Bad RCQ completion id: %d, qpn: %d\n",
+ __func__, i, ctx->local_qpn);
+ continue;
+ }
+
+ if (desc->page) {
+ for (j = 0; j < (desc->size / PAGE_SIZE); ++j)
+ ib_dma_unmap_page(ca, desc->sg_mapping[j],
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ } else if (desc->skb || desc->vaddr) {
+ ib_dma_unmap_single(ca, desc->sg_mapping[0], desc->size,
+ DMA_FROM_DEVICE);
+ }
+
+ if (ctx->free_buf) {
+ ctx->free_buf(ctx->client_arg,
+ desc->page ? desc->
+ page : (desc->skb ? desc->skb :
+ desc->vaddr), XSCORE_RECV_BUF);
+ } else {
+ kfree(desc->vaddr);
+ }
+ xscore_reset_rxdescriptor(desc);
+
+ }
+ for (i = 0; i < ctx->rx_ring_size; ++i) {
+ desc = &ctx->rx_ring[i];
+
+ if (desc->page || desc->vaddr || desc->skb) {
+ if (desc->page) {
+ for (j = 0; j < (desc->size / PAGE_SIZE); ++j)
+ ib_dma_unmap_page(ca,
+ desc->sg_mapping[j],
+ PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ } else if (desc->skb || desc->vaddr) {
+ ib_dma_unmap_single(ca, desc->sg_mapping[0],
+ desc->size,
+ DMA_FROM_DEVICE);
+ }
+ if (ctx->free_buf) {
+ ctx->free_buf(ctx->client_arg,
+ desc->page ? desc->
+ page : (desc->skb ? desc->skb :
+ desc->vaddr),
+ XSCORE_RECV_BUF);
+ } else {
+ kfree(desc->vaddr);
+ }
+
+ xscore_reset_rxdescriptor(desc);
+ }
+ }
+}
+
+static void xscore_reclaim_send_buffers(struct xscore_conn_ctx *ctx)
+{
+ struct ib_device *ca = ctx->port->xs_dev->device;
+ struct ib_wc wc;
+ struct xscore_desc *desc;
+ int i;
+
+ while (ib_poll_cq(ctx->scq, 1, &wc) > 0) {
+ i = (int)wc.wr_id;
+ if (i >= ctx->tx_ring_size) {
+ IB_ERROR("%s Send completion error wrid %d (> %d)\n",
+ __func__, i, ctx->tx_ring_size);
+ break;
+ }
+ desc = &ctx->tx_ring[i];
+ if (desc->skb)
+ xs_dma_unmap_tx(ca, desc);
+ else if (desc->vaddr)
+ ib_dma_unmap_single(ca, desc->mapping, desc->size,
+ DMA_TO_DEVICE);
+ else {
+ IB_ERROR("%s: Bad SCQ completion id: %d, qpn: %d\n",
+ __func__, i, ctx->local_qpn);
+ continue;
+ }
+ if (ctx->free_buf)
+ ctx->free_buf(ctx->client_arg,
+ desc->skb ? desc->skb : desc->vaddr,
+ XSCORE_SEND_BUF);
+ else if ((ctx->features & XSCORE_DONT_FREE_SENDBUF) == 0)
+ kfree(desc->vaddr);
+ desc->vaddr = 0;
+ desc->skb = 0;
+ desc->mapping = 0;
+ }
+ for (i = 0; i < ctx->tx_ring_size; ++i) {
+ desc = &ctx->tx_ring[i];
+
+ if (desc->vaddr || desc->skb) {
+ if (desc->skb)
+ xs_dma_unmap_tx(ca, desc);
+ else
+ ib_dma_unmap_single(ca, desc->mapping,
+ desc->size, DMA_TO_DEVICE);
+ if (ctx->free_buf)
+ ctx->free_buf(ctx->client_arg,
+ desc->skb ? desc->
+ skb : desc->vaddr,
+ XSCORE_SEND_BUF);
+ else if ((ctx->features & XSCORE_DONT_FREE_SENDBUF) ==
+ 0)
+ kfree(desc->vaddr);
+ desc->vaddr = 0;
+ desc->skb = 0;
+ desc->mapping = 0;
+ }
+ }
+}
+
+static void _xscore_conn_disconnect(struct xscore_conn_ctx *ctx, int oflags)
+{
+ struct ib_qp_attr qp_attr;
+ unsigned long flags;
+
+ IB_FUNCTION("%s: Disconnecting to 0x%llx, LID: 0x%x\n",
+ __func__, ctx->dguid, ctx->dlid);
+
+ qp_attr.qp_state = IB_QPS_RESET;
+ if (ctx->qp && !IS_ERR(ctx->qp))
+ (void)ib_modify_qp(ctx->qp, &qp_attr, IB_QP_STATE);
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ ctx->state = XSCORE_CONN_INIT;
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ init_completion(&ctx->done);
+ ctx->flags |= oflags;
+ if (ctx->cm_id && !ib_send_cm_dreq(ctx->cm_id, NULL, 0)) {
+ if (oflags & XSCORE_SYNCHRONOUS)
+ wait_for_completion(&ctx->done);
+ }
+ ctx->flags &= ~oflags;
+ /*
+ * This guarantees no CM callbacks are pending after destroy
+ */
+ if (ctx->cm_id && !IS_ERR(ctx->cm_id))
+ ib_destroy_cm_id(ctx->cm_id);
+ ctx->cm_id = 0;
+ IB_FUNCTION("%s: Disconnected to 0x%llx\n", __func__, ctx->dguid);
+ /*
+ * Reclaim all buffers back here
+ */
+
+ ctx->total_rwc = 0;
+ ctx->cur_rwc = 0;
+ ctx->total_swc = 0;
+ ctx->cur_swc = 0;
+
+ xscore_reclaim_send_buffers(ctx);
+ xscore_reclaim_recv_buffers(ctx);
+}
+
+void xscore_conn_disconnect(struct xscore_conn_ctx *ctx, int flags)
+{
+ mutex_lock(&ctx->mlock);
+ _xscore_conn_disconnect(ctx, flags);
+ mutex_unlock(&ctx->mlock);
+}
+EXPORT_SYMBOL(xscore_conn_disconnect);
+
+static void handle_cm_rep(struct xscore_conn_ctx *ctx)
+{
+ struct ib_qp_attr qp_attr;
+ int attr_mask = 0;
+
+ memset(&qp_attr, 0, sizeof(qp_attr));
+ qp_attr.qp_state = IB_QPS_INIT;
+ ctx->status = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &attr_mask);
+ if (ctx->status) {
+ IB_ERROR("ib_cm_init_qp_attr: QP to INIT\n");
+ return;
+ }
+ if (ctx->features & XSCORE_USE_CHECKSUM)
+ attr_mask |= XSCORE_USE_CHECKSUM;
+ ctx->status = ib_modify_qp(ctx->qp, &qp_attr, attr_mask);
+ if (ctx->status) {
+ IB_ERROR("ib: QP to INIT error\n");
+ return;
+ }
+
+ memset(&qp_attr, 0, sizeof(qp_attr));
+ qp_attr.qp_state = IB_QPS_RTR;
+ attr_mask = 0;
+ ctx->status = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &attr_mask);
+ if (ctx->status) {
+ IB_ERROR("ib_cm_init_qp_attr: QP to RTR, status=%d\n",
+ ctx->status);
+ return;
+ }
+
+ ctx->remote_qpn = qp_attr.dest_qp_num;
+ ctx->local_qpn = ctx->qp->qp_num;
+
+ if (ctx->features & XSCORE_RDMA_SUPPORT) {
+ attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC;
+ qp_attr.max_dest_rd_atomic = min (ctx->port->xs_dev->dev_attr.max_qp_rd_atom,
+ rdma_responder_resources);
+ } else {
+ qp_attr.max_dest_rd_atomic = 4;
+ }
+
+ attr_mask |= IB_QP_MIN_RNR_TIMER;
+ qp_attr.min_rnr_timer = IB_RNR_TIMER_000_16;
+ /*
+ * Handle some attributes for LLE
+ */
+ if (ctx->port->link_layer == IB_LINK_LAYER_ETHERNET) {
+ attr_mask |= IB_QP_RQ_PSN;
+ qp_attr.rq_psn = 0;
+ attr_mask |= IB_QP_AV;
+ qp_attr.ah_attr.grh.dgid = ctx->dgid;
+ qp_attr.ah_attr.sl = 0;
+ qp_attr.ah_attr.port_num = ctx->port->port_num;
+ qp_attr.ah_attr.grh.hop_limit = 1;
+ }
+
+ ctx->status = ib_modify_qp(ctx->qp, &qp_attr, attr_mask);
+ if (ctx->status) {
+ IB_ERROR("ib_cm_modify_qp: QP to RTR error, status=%d\n",
+ ctx->status);
+ return;
+ }
+ ctx->status = xs_post_recv(ctx, 0, ctx->rx_ring_size, GFP_KERNEL, 0);
+ if (ctx->status) {
+ IB_ERROR("ib: xs_post_recv error\n");
+ return;
+ }
+
+ memset(&qp_attr, 0, sizeof(qp_attr));
+ attr_mask = 0;
+ qp_attr.qp_state = IB_QPS_RTS;
+ ctx->status = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &attr_mask);
+ if (ctx->status)
+ return;
+ attr_mask |= IB_QP_TIMEOUT;
+ qp_attr.timeout = qp_timeout;
+ ctx->status = ib_modify_qp(ctx->qp, &qp_attr, attr_mask);
+ if (ctx->status) {
+ IB_ERROR("ib: QP to RTS error\n");
+ return;
+ }
+ ctx->status = ib_send_cm_rtu(ctx->cm_id, NULL, 0);
+ if (ctx->status) {
+ IB_ERROR("ib: ib_send_cm_rtu error\n");
+ return;
+ }
+}
+
+static int xscore_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+ struct xscore_conn_ctx *ctx = cm_id->context;
+ int comp = 0;
+ struct ib_qp_attr qp_attr;
+ int cback = 1;
+
+ switch (event->event) {
+ case IB_CM_REQ_ERROR:
+ IB_INFO("%s IB_CM_REQ_ERROR DGUID 0x%llx\n", __func__,
+ ctx->dguid);
+ ctx->state = XSCORE_CONN_ERR;
+ ctx->status = -ECONNRESET;
+ comp = 1;
+ break;
+ case IB_CM_REP_RECEIVED:
+ IB_INFO("%s IB_CM_REP_RCVD DGUID 0x%llx\n", __func__,
+ ctx->dguid);
+ comp = 1;
+ /*
+ * Now handle CM rep from remote end
+ */
+ handle_cm_rep(ctx);
+ if (ctx->status)
+ ctx->state = XSCORE_CONN_ERR;
+ else
+ ctx->state = XSCORE_CONN_CONNECTED;
+ break;
+ case IB_CM_REJ_RECEIVED:
+ IB_INFO("%s IB_CM_REJ_RCVD DGUID 0x%llx", __func__, ctx->dguid);
+ IB_INFO(",reason: %d, ", event->param.rej_rcvd.reason);
+ IB_INFO("SID: 0x%llx\n", ctx->service_id);
+ comp = 1;
+ ctx->status = -ECONNRESET;
+ ctx->state = XSCORE_CONN_ERR;
+ break;
+ case IB_CM_DREQ_RECEIVED:
+ /*
+ * Handle this gracefully and try to re-connect
+ */
+ IB_INFO("%s IB_CM_DREQ_RCVD DGUID 0x%llx\n", __func__,
+ ctx->dguid);
+ qp_attr.qp_state = IB_QPS_RESET;
+ (void)ib_modify_qp(ctx->qp, &qp_attr, IB_QP_STATE);
+ ib_send_cm_drep(ctx->cm_id, NULL, 0);
+ comp = 1;
+ ctx->state = XSCORE_CONN_RDISCONNECTED;
+ break;
+ case IB_CM_DREP_RECEIVED:
+ IB_INFO("%s IB_CM_DREP_RCVD DGUID 0x%llx\n", __func__,
+ ctx->dguid);
+ comp = 1;
+ ctx->status = 0;
+ ctx->state = XSCORE_CONN_LDISCONNECTED;
+ break;
+ case IB_CM_DREQ_ERROR:
+ IB_INFO("%s IB_CM_DREQ_ERR DGUID 0x%llx\n", __func__,
+ ctx->dguid);
+ comp = 1;
+ ctx->status = -ECONNRESET;
+ ctx->state = XSCORE_CONN_LDISCONNECTED;
+ break;
+ case IB_CM_TIMEWAIT_EXIT:
+ cback = 0;
+ break;
+ default:
+ cback = 0;
+ break;
+ }
+ if (comp && cback && (ctx->flags & XSCORE_SYNCHRONOUS))
+ complete(&ctx->done);
+ if (ctx->event_handler && cback)
+ ctx->event_handler(ctx->client_arg, ctx->state);
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * This file implements XDS/XDDS protocol as well as XSMP protocol
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <linux/jiffies.h>
+#include <linux/proc_fs.h>
+
+#include "xscore_priv.h"
+#include "xs_compat.h"
+#include "xs_versions.h"
+#include "xscore.h"
+
+#ifndef XSIGO_LOCAL_VERSION
+#define XSCORE_VERSION "Unknown"
+#error "No Version"
+#else
+#define XSCORE_VERSION XSIGO_LOCAL_VERSION
+#endif
+
+MODULE_AUTHOR("Oracle corp (OVN-linux-drivers@oracle.com)");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("OVN core driver");
+MODULE_VERSION(XSCORE_VERSION);
+
+int xscore_debug = 0x0;
+module_param(xscore_debug, int, 0644);
+
+int xscore_force_sm_change;
+module_param(xscore_force_sm_change, int, 0644);
+int xscore_notify_ulps;
+module_param(xscore_notify_ulps, int, 0644);
+
+char hostname_str[XSIGO_MAX_HOSTNAME + 1];
+char system_id_str[64];
+
+static char *hostname;
+module_param(hostname, charp, 0444);
+static char *system_id;
+module_param(system_id, charp, 0444);
+
+char *os_version;
+module_param(os_version, charp, 0444);
+char *os_arch;
+module_param(os_arch, charp, 0444);
+
+#if defined(INDIVIDUAL_HEAPS)
+vmk_heapid ib_basic_heapid;
+#endif
+
+struct ib_sa_client xscore_sa_client;
+struct list_head xscore_port_list;
+struct mutex xscore_port_mutex;
+
+static void xscore_add_one(struct ib_device *device);
+static void xscore_remove_one(struct ib_device *device);
+static void xds_send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc);
+static void xds_recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_recv_wc *mad_recv_wc);
+static int xds_check_xcm_record(struct xscore_port *port,
+ struct ib_xds_mad *xds_mad);
+static void xscore_port_event_handler(struct work_struct *work);
+
+static struct ib_client xscore_client = {
+ .name = "xscore",
+ .add = xscore_add_one,
+ .remove = xscore_remove_one
+};
+
+/*
+ * Initialize XDS mad agent to send and receive XDS query
+ */
+static int xscore_init_mad_agent(struct xscore_port *port)
+{
+ struct xscore_dev *xs_dev = port->xs_dev;
+ struct ib_mad_reg_req mad_reg_req;
+
+ memset(&mad_reg_req, 0, sizeof(struct ib_mad_reg_req));
+ mad_reg_req.mgmt_class = XSIGO_MGMT_CLASS;
+ mad_reg_req.mgmt_class_version = XSIGO_MGMT_CLASS_VERSION;
+ set_bit(IB_MGMT_METHOD_GET, mad_reg_req.method_mask);
+
+ port->mad_agent = ib_register_mad_agent(xs_dev->device,
+ port->port_num, IB_QPT_GSI,
+ &mad_reg_req, 0,
+ xds_send_handler,
+ xds_recv_handler, (void *)port,
+ 0);
+
+ if (IS_ERR(port->mad_agent)) {
+ IB_ERROR("Failure registering mad-handle for ");
+ IB_ERROR("port %d,", port->port_num);
+ IB_ERROR("GUID: 0x%llx\n", port->guid);
+ return PTR_ERR(port->mad_agent);
+ }
+ return 0;
+}
+
+/*
+ * This is the callback for service record query by the IB MAD layer
+ */
+static void service_rec_callback(int status, struct ib_sa_service_rec *resp,
+ void *context)
+{
+ struct xscore_port *port = (struct xscore_port *)context;
+
+ if (!status && resp) {
+ port->xds_lid = be16_to_cpu(resp->data16[0]);
+ port->xds_guid = be64_to_cpu(resp->data64[0]);
+ } else {
+ XDS_INFO("service_rec_callback: failed code: %d,", status);
+ XDS_INFO("port %d, GUID: 0x%llx\n", port->port_num, port->guid);
+ port->counters[PORT_XDS_SA_QUERY_TOUT_COUNTER]++;
+ set_bit(XSCORE_FORCE_SM_CHANGE, &port->flags);
+ }
+ port->sa_query_status = status;
+ /*
+ * Wake up thread waiting
+ */
+ XDS_INFO("service_rec_callback: success code: %d, GUID: 0x%llx\n",
+ status, port->guid);
+ complete(&port->sa_query_done);
+}
+
+static void xdds_msg_handler(struct work_struct *work)
+{
+ struct xdds_work *xwork = container_of(work, struct xdds_work,
+ work);
+ struct xdp_hdr *msghdr = (struct xdp_hdr *)xwork->msg;
+
+ xscore_set_wq_state(XSCORE_WQ_XDDS_HANDLER);
+ switch (ntohs(msghdr->type)) {
+
+ case XDP_MSG_TYPE_DISC_SOL:
+
+ /* Unicast from chassis (xcfm info) */
+ if (ntohs(msghdr->flags) & XDP_FLAGS_RSP) {
+ struct ib_xds_mad xds_mad;
+
+ memset(&xds_mad, 0, sizeof(struct ib_xds_mad));
+ memcpy(xds_mad.data,
+ xwork->msg + sizeof(struct xdp_hdr),
+ sizeof(struct xcm_list));
+ /*
+ * Now call XCM list handling routine
+ */
+ xds_check_xcm_record(xwork->port, &xds_mad);
+ }
+ break;
+ default:
+ XDDS_ERROR("%s: Port GUID: ", __func__);
+ XDDS_ERROR("0x%llx", xwork->port->guid);
+ XDDS_ERROR("Unexpected protocol type");
+ XDDS_ERROR(" %d\n", ntohs(msghdr->type));
+ break;
+ }
+ xs_ud_free(xwork->msg);
+ kfree(xwork);
+ xscore_clear_wq_state(XSCORE_WQ_XDDS_HANDLER);
+}
+
+static void xs_ud_callback(void *arg, void *msg, int len)
+{
+ struct xscore_port *port = arg;
+ struct xdds_work *xwork;
+ unsigned long flags;
+
+ /*
+ * Grab spin lock and check for SHUTDOWN state
+ */
+ spin_lock_irqsave(&port->lock, flags);
+ if (test_bit(XSCORE_PORT_SHUTDOWN, &port->flags))
+ goto out;
+ xwork = kzalloc(sizeof(struct xdds_work), GFP_ATOMIC);
+ if (xwork) {
+ xwork->msg = (u8 *) msg;
+ xwork->msg_len = len;
+ xwork->port = port;
+ INIT_WORK(&xwork->work, xdds_msg_handler);
+ queue_work(port->port_wq, &xwork->work);
+ } else
+out :
+ xs_ud_free(msg);
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+#define XSCORE_SA_QUERY_TIMEOUT (3*1000)
+
+/*
+ * This function queries SA for XDS service record. This is synchronous
+ * and needs to be called in thread/workq context
+ */
+
+int xscore_query_svc_record(struct xscore_port *port)
+{
+ struct xscore_dev *xs_dev = port->xs_dev;
+ struct ib_sa_service_rec service_rec;
+ struct ib_sa_query *query;
+ struct ib_port_attr attr;
+ int ret;
+
+ memset(&service_rec, 0, sizeof(service_rec));
+ strcpy(service_rec.name, "XSIGOXDS");
+ init_completion(&port->sa_query_done);
+
+ if (xscore_notify_ulps || (xscore_force_sm_change &&
+ test_and_clear_bit(XSCORE_FORCE_SM_CHANGE,
+ &port->flags))) {
+ XDS_INFO("ib_sa_force_update: port %d GUID: 0x%llx\n",
+ port->port_num, port->guid);
+ attr.sm_lid = port->sm_lid;
+ attr.lid = port->lid;
+ /* mode = 1 Notify ULPs about IB events */
+ ib_sa_force_update(&xscore_sa_client,
+ xs_dev->device, &attr, port->port_num,
+ xscore_notify_ulps);
+ }
+ port->rec_poller_state = XDS_RECP_SAUPDATE_DONE;
+
+ ret = ib_sa_service_rec_query(&xscore_sa_client,
+ xs_dev->device, port->port_num,
+ IB_MGMT_METHOD_GET, &service_rec,
+ IB_SA_SERVICE_REC_SERVICE_NAME,
+ XSCORE_SA_QUERY_TIMEOUT, GFP_KERNEL,
+ &service_rec_callback, port, &query);
+ port->rec_poller_state = XDS_RECP_SAREC_DONE;
+ if (ret) {
+ XDS_INFO("ib_sa_service_rec_query: failed %d ret,", ret);
+ XDS_INFO(" port: %d,", port->port_num);
+ XDS_INFO(" GUID: 0x%llx\n:", port->guid);
+ port->counters[PORT_XDS_SA_QUERY_ERROR_COUNTER]++;
+ return ret;
+ }
+ port->counters[PORT_XDS_SA_QUERY_COUNTER]++;
+ /*
+ * This is get out of jail in case we do not
+ * get any completion, must never happen
+ */
+ if (!wait_for_completion_timeout(&port->sa_query_done,
+ msecs_to_jiffies
+ (XSCORE_SA_QUERY_TIMEOUT * 10))) {
+ XDS_ERROR("%s: completion timeout, port: %d, GUID: 0x%llx\n:",
+ __func__, port->port_num, port->guid);
+ return -ETIMEDOUT;
+ }
+ return port->sa_query_status;
+}
+
+static void create_ib_mad_header(struct xscore_port *port,
+ struct ib_xds_mad *xds_mad)
+{
+ struct ib_mad_hdr *mad_hdr = &xds_mad->mad_hdr;
+
+ mad_hdr->base_version = IB_MGMT_BASE_VERSION;
+ mad_hdr->mgmt_class = XSIGO_MGMT_CLASS;
+ mad_hdr->class_version = XSIGO_MGMT_CLASS_VERSION;
+ mad_hdr->method = IB_MGMT_METHOD_GET;
+ mad_hdr->attr_id = cpu_to_be16(IB_MAD_ATTR_XCM_REQUEST);
+ mad_hdr->tid = port->mad_agent->hi_tid;
+ mad_hdr->tid <<= 32;
+ mad_hdr->tid |= port->port_num;
+ mad_hdr->tid = cpu_to_be64(mad_hdr->tid);
+}
+
+/*
+ * 31 bit should be set
+ * |30|......|17 |16 |15|14
+ * ...... | VHBA | VNIC |
+ */
+static void xds_send_cap_info(struct xds_request *request)
+{
+ uint32_t cap_info, i;
+
+ cap_info = (1 << 31) & 0xffffffff;
+
+ for (i = 0; i < RESOURCE_FLAG_INDEX_MAX; i++) {
+ if (xcpm_resource_flags & (1 << i))
+ cap_info = (cap_info & 0xffff0000) | (1 << (16 + i));
+ }
+ request->reserved = htonl(cap_info);
+}
+
+/*
+ * Create a XDS query packet
+ */
+static void create_xds_mad_req(struct xscore_port *port,
+ struct xds_request *request)
+{
+ u8 h[16 + 1];
+ char tmp_os_version[64];
+ unsigned long system_id_ul;
+ int ret;
+
+ request->server_record.port_id = cpu_to_be64(port->guid);
+ strncpy(request->hostname, hostname_str, XSIGO_MAX_HOSTNAME);
+ snprintf(tmp_os_version, sizeof(tmp_os_version) - 1, "%s:xg-%s",
+ init_utsname()->release, XSCORE_VERSION);
+ if (strlen(tmp_os_version) >= sizeof(request->os_version)) {
+ snprintf(request->os_version, sizeof(request->os_version) - 1,
+ "%s", init_utsname()->release);
+ snprintf(request->build_version,
+ sizeof(request->build_version) - 1, "xg-%s",
+ XSCORE_VERSION);
+ } else {
+ snprintf(request->os_version, sizeof(request->os_version) - 1,
+ "%s:xg-%s", init_utsname()->release, XSCORE_VERSION);
+ }
+ strcpy(request->os_arch, init_utsname()->machine);
+ request->os_type = htonl(RESOURCE_OS_TYPE_LINUX);
+ request->os_version[sizeof(request->os_version) - 1] = 0;
+ request->os_arch[sizeof(request->os_arch) - 1] = 0;
+
+ request->fw_version = cpu_to_be64(port->xs_dev->fw_ver);
+ request->hw_version = htonl(port->xs_dev->hw_ver);
+ request->driver_version = htonl(XSIGO_LINUX_DRIVER_VERSION);
+ if (system_id_str[0]) {
+ ret = kstrtoul(system_id_str + 16, 16, &system_id_ul);
+ request->system_id_l = cpu_to_be64(system_id_ul);
+ memcpy(h, system_id_str, 16);
+ h[16] = 0;
+ ret = kstrtoul(h, 16, &system_id_ul);
+ request->system_id_h = cpu_to_be64(system_id_ul);
+ }
+ xds_send_cap_info(request);
+}
+
+/*
+ * Send completion handler for XDS query
+ */
+static void xds_send_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+ struct xscore_port *port = agent->context;
+
+ switch (mad_send_wc->status) {
+ case IB_WC_SUCCESS:
+ break;
+ default:
+ break;
+ }
+
+ XDS_INFO("%s, Unmapping send buffer: status %d, Port GUID: 0x%llx\n",
+ __func__, mad_send_wc->status, port->guid);
+
+ ib_destroy_ah(msg->ah);
+ ib_free_send_mad(msg);
+}
+
+static int xds_check_xcm_record(struct xscore_port *port,
+ struct ib_xds_mad *xds_mad)
+{
+ struct xcm_list list;
+ int i;
+
+ XDS_FUNCTION("%s: port 0x%llx\n", __func__, port->guid);
+
+ /*
+ * Skip server_info structure size in response
+ */
+ memcpy(&list, xds_mad->data + sizeof(struct server_info), sizeof(list));
+
+ XDS_INFO("%s: port 0x%llx, XCM list count %d\n", __func__,
+ port->guid, list.count);
+
+ if (list.count > MAX_XCFM_COUNT) {
+ /*
+ * Print error
+ */
+ XDS_ERROR("%s GUID: 0x%llx, list count range error %d\n",
+ __func__, port->guid, list.count);
+ return -EINVAL;
+ }
+ if (list.count && list.xcm_version != XCM_REC_VERSION) {
+ XDS_ERROR("%s GUID: 0x%llx, Bad XCM version %d\n",
+ __func__, port->guid, list.xcm_version);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < list.count; i++) {
+ u64 dguid;
+ u16 dlid;
+ /*
+ * Go through all the XSMP sessions and verify for any duplicate
+ */
+ struct xcfm_record *xcmp = &list.xcms[i];
+
+ dguid = be64_to_cpu(xcmp->port_id);
+ dlid = be16_to_cpu(xcmp->xcm_lid);
+ XDS_INFO("Port GUID: 0x%llx, XCM lid: 0x%x, XCM guid: 0x%llx\n",
+ port->guid, dlid, dguid);
+ xsmp_allocate_xsmp_session(port, dguid, dlid);
+ }
+ if (list.count) {
+ port->counters[PORT_XDS_LIST_COUNT_COUNTER]++;
+ set_bit(XSCORE_SP_PRESENT, &port->flags);
+ clear_bit(XSCORE_SP_NOT_PRESENT, &port->flags);
+ } else {
+ port->counters[PORT_XDS_LIST_COUNT_ZERO_COUNTER]++;
+ set_bit(XSCORE_SP_NOT_PRESENT, &port->flags);
+ clear_bit(XSCORE_SP_PRESENT, &port->flags);
+ }
+
+ return 0;
+}
+
+/*
+ * Receive completion handler for XDS query
+ */
+static void xds_recv_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct xscore_port *port = mad_agent->context;
+
+ XDS_FUNCTION("%s: port 0x%llx\n", __func__, port->guid);
+
+ port->counters[PORT_XDS_XDS_QUERY_COUNTER]++;
+ port->mad_recv_wc = mad_recv_wc;
+ complete(&port->xds_query_done);
+}
+
+/*
+ * This routine queries XDS for XCM record. This is synchronous and needs to
+ * called in thread/workq context
+ */
+int xscore_query_xds_xcm_rec(struct xscore_port *port)
+{
+ struct xscore_dev *xs_dev = port->xs_dev;
+ struct ib_ah_attr ah_attr;
+ struct ib_mad_recv_wc *mad_recv_wc;
+ struct ib_xds_mad *xds_mad;
+ struct xds_request *request;
+ struct ib_port_attr port_attr;
+ int ret;
+
+ XDS_FUNCTION("%s: port 0x%llx\n", __func__, port->guid);
+
+ port->send_buf = ib_create_send_mad(port->mad_agent, 1, 0, 0,
+ IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
+ GFP_KERNEL);
+ port->rec_poller_state = XDS_RECP_CREATEMAD_DONE;
+ if (IS_ERR(port->send_buf)) {
+ ret = PTR_ERR(port->send_buf);
+ IB_ERROR("ib_create_send_mad failed, error %d, GUID: 0x%llx\n",
+ ret, port->guid);
+ return ret;
+ }
+ /*
+ * Create XDS MAD query packet
+ */
+ xds_mad = port->send_buf->mad;
+ memset(xds_mad, 0, sizeof(*xds_mad));
+ request = (struct xds_request *)xds_mad->data;
+ create_ib_mad_header(port, xds_mad);
+ create_xds_mad_req(port, request);
+
+ memset(&ah_attr, 0, sizeof(ah_attr));
+ ah_attr.dlid = port->xds_lid;
+ (void)ib_query_port(xs_dev->device, port->port_num, &port_attr);
+ ah_attr.sl = port_attr.sm_sl;
+ ah_attr.port_num = port->port_num;
+
+ port->send_buf->ah = ib_create_ah(port->mad_agent->qp->pd, &ah_attr);
+ if (IS_ERR(port->send_buf->ah)) {
+ ib_free_send_mad(port->send_buf);
+ ret = PTR_ERR(port->send_buf->ah);
+ IB_ERROR("ib_create_ah failed, error %d, GUID: 0x%llx\n",
+ ret, port->guid);
+ return ret;
+ }
+ port->rec_poller_state = XDS_RECP_CREATEAH_DONE;
+
+ port->send_buf->retries = 2;
+ port->send_buf->timeout_ms = XSCORE_SA_QUERY_TIMEOUT;
+
+ init_completion(&port->xds_query_done);
+
+ ret = ib_post_send_mad(port->send_buf, NULL);
+ if (ret) {
+ IB_ERROR("ib_post_send_mad failed, error %d, GUID: 0x%llx\n",
+ ret, port->guid);
+ ib_destroy_ah(port->send_buf->ah);
+ ib_free_send_mad(port->send_buf);
+ port->counters[PORT_XDS_XDS_QUERY_ERROR_COUNTER]++;
+ port->send_buf = 0;
+ return ret;
+ }
+ port->rec_poller_state = XDS_RECP_SENDMAD_DONE;
+ if (!wait_for_completion_timeout(&port->xds_query_done,
+ msecs_to_jiffies
+ (XSCORE_SA_QUERY_TIMEOUT * 10))) {
+ XDS_ERROR("%s: completion timeout, port: %d, GUID: 0x%llx\n:",
+ __func__, port->port_num, port->guid);
+ port->counters[PORT_XDS_XDS_QUERY_TOUT_COUNTER]++;
+ return -ETIMEDOUT;
+ }
+ mad_recv_wc = port->mad_recv_wc;
+ if (!mad_recv_wc || mad_recv_wc->wc->status != IB_WC_SUCCESS) {
+ if (mad_recv_wc)
+ ret = mad_recv_wc->wc->status;
+ else
+ ret = -EINVAL;
+ } else
+ xds_check_xcm_record(port,
+ (struct ib_xds_mad *)mad_recv_wc->
+ recv_buf.mad);
+ ib_free_recv_mad(port->mad_recv_wc);
+ port->rec_poller_state = XDS_RECP_FREEMAD_DONE;
+ port->mad_recv_wc = 0;
+ return ret;
+}
+
+static int xs_send_xds_disc_msg(struct xscore_port *port)
+{
+ int ret;
+ struct xdds_disc_req xd_msg;
+
+ port->counters[PORT_XDS_XDS_QUERY_COUNTER]++;
+
+ memset(&xd_msg, 0, sizeof(struct xdds_disc_req));
+ xd_msg.xhdr.type = htons(XDP_MSG_TYPE_DISC_SOL);
+ xd_msg.xhdr.flags = htons(XDP_FLAGS_REQ);
+ xd_msg.xhdr.len = htons(sizeof(struct xdds_disc_req));
+
+ create_xds_mad_req(port, &xd_msg.req);
+ ret = xs_ud_send_msg(port, 0, &xd_msg, sizeof(xd_msg), XS_UD_COPY_MSG);
+ if (ret) {
+ XDDS_ERROR("xs_ud_send_msg: port GUID %llx failed, error %d\n",
+ port->guid, ret);
+ port->counters[PORT_XDS_XDS_QUERY_ERROR_COUNTER]++;
+ }
+ return ret;
+}
+
+static void xcm_rec_poller(struct work_struct *work)
+{
+ struct xscore_port *port = container_of(work, struct xscore_port,
+ poll_work.work);
+ unsigned long flags;
+ struct ib_port_attr port_attr;
+ int ret = 0;
+
+ xscore_set_wq_state(XSCORE_DWQ_POLL_WORK);
+ port->rec_poller_state = XDS_RECP_START;
+ xsmp_cleanup_stale_xsmp_sessions(port, 0);
+
+ (void)ib_query_port(port->xs_dev->device, port->port_num, &port_attr);
+ port->rec_poller_state = XDS_RECP_QUERY_IB_DONE;
+
+ if (port_attr.state != IB_PORT_ACTIVE) {
+ XDS_INFO("%s: Port %d, GUID: 0x%llx, Not Active\n",
+ __func__, port->port_num, port->guid);
+ port->counters[PORT_XDS_PORT_NOT_ACTIVE_COUNTER]++;
+ } else {
+ if (port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+ ret = xscore_query_svc_record(port);
+ if (!ret)
+ ret = xscore_query_xds_xcm_rec(port);
+ } else
+ (void)xs_send_xds_disc_msg(port);
+ }
+ if (ret)
+ port->poll_interval = msecs_to_jiffies(1000 * 10);
+ else
+ port->poll_interval = msecs_to_jiffies(1000 * 20);
+ spin_lock_irqsave(&port->lock, flags);
+ if (!test_bit(XSCORE_PORT_SHUTDOWN, &port->flags))
+ queue_delayed_work(port->port_wq,
+ &port->poll_work, port->poll_interval);
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ port->rec_poller_state = XDS_RECP_DONE;
+ port->rec_poller_time = jiffies;
+ xscore_clear_wq_state(XSCORE_DWQ_POLL_WORK);
+
+}
+
+static void xscore_destroy_port(struct xscore_port *port)
+{
+ IB_FUNCTION("%s: port %d\n", __func__, port->port_num);
+ if (port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+ ib_unregister_mad_agent(port->mad_agent);
+ port->mad_agent = 0;
+ } else
+ xs_ud_destroy(port);
+}
+
+/*
+ * Convert GUID to MAC address by stripping out bytes 3 and 4 == FF0E
+ * Reset bit 7 of byte 0 as per specification
+ */
+static void convert_guid_to_mac(u64 guid, u64 *mac)
+{
+ u64 t1;
+
+ t1 = guid & 0x0000000000FFFFFFLL;
+ guid >>= 16;
+ t1 |= (guid & 0x0000FFFFFF000000LL);
+ *mac = t1;
+ *mac ^= (1ULL << 41);
+}
+
+/*
+ * Initialize Query based on port information
+ */
+static int xscore_init_port(struct xscore_port *port)
+{
+ struct xscore_dev *xs_dev = port->xs_dev;
+ struct ib_port_attr port_attr;
+ int ret;
+
+ IB_FUNCTION("%s\n", __func__);
+
+ ret = ib_query_gid(xs_dev->device, port->port_num, 0, &port->sgid);
+ if (ret) {
+ IB_ERROR("xscore_init_port: ib_query_gid GUID 0x%llx %d\n",
+ port->guid, ret);
+ return ret;
+ }
+ /*
+ * Get port attributes and check the type of the port
+ */
+ ret = ib_query_port(xs_dev->device, port->port_num, &port_attr);
+ if (ret) {
+ IB_ERROR("xscore_init_port: ib_query_port GUID: 0x%llx, %d\n",
+ port->guid, ret);
+ return ret;
+ }
+ port->link_layer = rdma_port_link_layer(xs_dev->device, port->port_num);
+ port->guid = be64_to_cpu(port->sgid.global.interface_id);
+ port->lid = port_attr.lid;
+ port->sm_lid = port_attr.sm_lid;
+
+ XDS_PRINT("Port Number: %d, ", port->port_num);
+ XDS_PRINT("GUID: 0x%llx, ", port->guid);
+ XDS_PRINT("LID: 0x%x, ", port->lid);
+ XDS_PRINT("SM LID: 0x%x, ", port->sm_lid);
+ XDS_PRINT("Mode: ");
+ XDS_PRINT("%s\n",
+ port->link_layer == IB_LINK_LAYER_INFINIBAND ? "IB" : "ETH");
+
+ if (port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+ ret = xscore_init_mad_agent(port);
+ } else {
+ u64 mac;
+
+ /*
+ * Convert to MAC only if valid GUID.
+ * In case of link down, GUID is zero
+ */
+ if (port->guid) {
+ convert_guid_to_mac(port->guid, &mac);
+ port->guid = mac;
+ }
+ ret = xs_ud_create(port, xs_ud_callback, port);
+ }
+ return ret;
+}
+
+static void xscore_remove_port(struct xscore_port *port)
+{
+ /*
+ * Set a state bit to tell others we are going down
+ */
+ IB_FUNCTION("%s: port %d\n", __func__, port->port_num);
+
+ flush_workqueue(port->port_wq);
+ destroy_workqueue(port->port_wq);
+ port->port_wq = 0;
+ xscore_destroy_port(port);
+ list_del(&port->port_list);
+ mutex_lock(&xscore_port_mutex);
+ list_del(&port->gport_list);
+ mutex_unlock(&xscore_port_mutex);
+ xcpm_port_remove_proc_entry(port);
+ kfree(port);
+}
+
+/*
+ * Initialize a port context
+ */
+static struct xscore_port *xscore_add_port(struct xscore_dev *device,
+ u8 port_num)
+{
+ struct xscore_port *port;
+ char name[32];
+ int ret;
+
+ IB_FUNCTION("%s: port %d\n", __func__, port_num);
+
+ port = kzalloc(sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return NULL;
+ port->xs_dev = device;
+ port->port_num = port_num;
+
+ INIT_LIST_HEAD(&port->xsmp_list);
+ spin_lock_init(&port->lock);
+
+ INIT_WORK(&port->ework, xscore_port_event_handler);
+ INIT_DELAYED_WORK(&port->poll_work, xcm_rec_poller);
+ sprintf(name, "xs_wq:%d", port_num);
+ port->port_wq = create_singlethread_workqueue(name);
+ if (!port->port_wq)
+ goto err_ret;
+
+ ret = xscore_init_port(port);
+ if (ret) {
+ XDS_ERROR("xscore_init_port failed %d\n", ret);
+ goto err_ret1;
+ }
+
+ if (port->guid)
+ xcpm_port_add_proc_entry(port);
+ /*
+ * Now start XCM record polling
+ */
+ queue_delayed_work(port->port_wq,
+ &port->poll_work, port->poll_interval);
+
+ return port;
+err_ret1:
+ destroy_workqueue(port->port_wq);
+err_ret:
+ kfree(port);
+ return NULL;
+}
+
+static void xscore_port_event_handler(struct work_struct *work)
+{
+ struct xscore_port *port =
+ container_of(work, struct xscore_port, ework);
+ struct ib_port_attr port_attr;
+ int port_up;
+
+ xscore_set_wq_state(XSCORE_WQ_PORT_EVENTH);
+ if (port->link_layer == IB_LINK_LAYER_ETHERNET &&
+ test_bit(XSCORE_PORT_LID_CHANGE, &port->flags)) {
+ u64 mac;
+
+ clear_bit(XSCORE_PORT_LID_CHANGE, &port->flags);
+ ib_query_gid(port->xs_dev->device, port->port_num, 0,
+ &port->sgid);
+ port->guid = be64_to_cpu(port->sgid.global.interface_id);
+ convert_guid_to_mac(port->guid, &mac);
+ port->guid = mac;
+ xcpm_port_add_proc_entry(port);
+ }
+
+ (void)ib_query_port(port->xs_dev->device, port->port_num, &port_attr);
+
+ /*
+ * In the case of SM lid change update with new one
+ */
+ if (xscore_notify_ulps
+ && (test_and_clear_bit(XSCORE_PORT_SMLID_CHANGE, &port->flags))) {
+ pr_info("%s port%d SM Update ", __func__, port->port_num);
+ pr_info(" [New %x old %x]\n", port_attr.sm_lid, port->sm_lid);
+ port->sm_lid = port_attr.sm_lid;
+ }
+
+ /*
+ * We have seen the ACTIVE event come up, but port is still not ACTIVE
+ * Make it active if we get ACTIVE event and port is still not active
+ */
+ if (port->pevent == IB_EVENT_PORT_ACTIVE
+ || port_attr.state == IB_PORT_ACTIVE) {
+ pr_info("xscore: Port: %llx UP\n", port->guid);
+ port_up = 1;
+ port->lid = port_attr.lid;
+ port->sm_lid = port_attr.sm_lid;
+ } else {
+ port_up = 0;
+ pr_info("xscore: Port: %llx DOWN\n", port->guid);
+ }
+ xsmp_ulp_notify(port, port_up);
+ xscore_clear_wq_state(XSCORE_WQ_PORT_EVENTH);
+}
+
+/*
+ * IB stack event handler callback
+ */
+static void xscore_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct xscore_dev *xs_dev =
+ ib_get_client_data(event->device, &xscore_client);
+ struct xscore_port *port;
+ int port_num = event->element.port_num;
+
+ if (!xs_dev || xs_dev->device != event->device)
+ return;
+
+ list_for_each_entry(port, &xs_dev->port_list, port_list) {
+ if (port->port_num == port_num)
+ goto found;
+ }
+ return;
+
+found:
+ port->pevent = event->event;
+
+ switch (event->event) {
+ case IB_EVENT_PORT_ERR:
+ case IB_EVENT_PORT_ACTIVE:
+ queue_work(port->port_wq, &port->ework);
+ break;
+ case IB_EVENT_LID_CHANGE:
+ /*
+ * Used by IBOE
+ */
+ set_bit(XSCORE_PORT_LID_CHANGE, &port->flags);
+ queue_work(port->port_wq, &port->ework);
+ break;
+ case IB_EVENT_PKEY_CHANGE:
+ break;
+ case IB_EVENT_SM_CHANGE:
+ if (xscore_notify_ulps) {
+ set_bit(XSCORE_PORT_SMLID_CHANGE, &port->flags);
+ queue_work(port->port_wq, &port->ework);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static const u64 min_fw_version = (2ULL << 32) | (7ULL << 16) | (0ULL << 0);
+
+static int xscore_is_mlx4_fw_down_rev(u64 fw_ver)
+{
+
+ return (fw_ver < min_fw_version);
+}
+
+/*
+ * This callback gets called ror every HCA in the system
+ * This gets executed for the most part in the register call context
+ */
+static void xscore_add_one(struct ib_device *device)
+{
+ struct xscore_dev *xs_dev;
+ struct ib_device_attr dev_attr;
+ int p;
+ struct xscore_port *port;
+
+ IB_FUNCTION("%s: device: %s\n", __func__, device->name);
+
+ if (ib_query_device(device, &dev_attr)) {
+ IB_ERROR("Query device failed for %s\n", device->name);
+ return;
+ }
+
+ /* See if this is some form of a Mellanox ConnectX card */
+ if (strncmp(device->name, "mlx4", sizeof("mlx4") - 1) == 0) {
+ if (xscore_is_mlx4_fw_down_rev(dev_attr.fw_ver)) {
+ pr_info("Firmware on device \"%s\" (%d,%d,%d) is below",
+ device->name,
+ (int)((dev_attr.fw_ver >> 32) & 0xffff),
+ (int)((dev_attr.fw_ver >> 16) & 0xffff),
+ (int)(dev_attr.fw_ver & 0xffff));
+ pr_info(" min needed to support ethernet transport");
+ pr_info("Minimum firmware version is %d.%d.%d\n",
+ (int)((min_fw_version >> 32) & 0xffff),
+ (int)((min_fw_version >> 16) & 0xffff),
+ (int)(min_fw_version & 0xffff));
+ }
+ }
+
+ xs_dev = kzalloc(sizeof(*xs_dev), GFP_KERNEL);
+ if (!xs_dev)
+ return;
+
+ INIT_LIST_HEAD(&xs_dev->port_list);
+ if (strstr(device->name, "xgc"))
+ xs_dev->is_shca = 1;
+ xs_dev->device = device;
+ xs_dev->dev_attr = dev_attr;
+ xs_dev->fw_ver = dev_attr.fw_ver;
+ xs_dev->hw_ver = dev_attr.hw_ver;
+ xs_dev->pd = ib_alloc_pd(device);
+ if (IS_ERR(xs_dev->pd))
+ goto free_dev;
+
+ xs_dev->mr = ib_get_dma_mr(xs_dev->pd,
+ IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE);
+ if (IS_ERR(xs_dev->mr))
+ goto err_pd;
+
+ for (p = 1; p <= device->phys_port_cnt; ++p) {
+ port = xscore_add_port(xs_dev, p);
+ if (port) {
+ list_add_tail(&port->port_list, &xs_dev->port_list);
+ mutex_lock(&xscore_port_mutex);
+ list_add_tail(&port->gport_list, &xscore_port_list);
+ mutex_unlock(&xscore_port_mutex);
+ }
+ }
+
+ ib_set_client_data(device, &xscore_client, xs_dev);
+
+ INIT_IB_EVENT_HANDLER(&xs_dev->event_handler, xs_dev->device,
+ xscore_event_handler);
+ (void)ib_register_event_handler(&xs_dev->event_handler);
+
+ return;
+
+err_pd:
+ ib_dealloc_pd(xs_dev->pd);
+free_dev:
+ kfree(xs_dev);
+}
+
+/*
+ * Remove a HCA from the system, happens during driver unload when we unregister
+ * from IB stack
+ */
+static void xscore_remove_one(struct ib_device *device)
+{
+ struct xscore_dev *xs_dev;
+ struct xscore_port *port;
+ struct xscore_port *tmp_port;
+ unsigned long flags;
+
+ IB_FUNCTION("%s: device: %s\n", __func__, device->name);
+
+ xs_dev = ib_get_client_data(device, &xscore_client);
+ ib_unregister_event_handler(&xs_dev->event_handler);
+ /*
+ * Now go through the port list and shut down everything you can
+ */
+ list_for_each_entry_safe(port, tmp_port, &xs_dev->port_list,
+ port_list) {
+ spin_lock_irqsave(&port->lock, flags);
+ set_bit(XSCORE_PORT_SHUTDOWN, &port->flags);
+ spin_unlock_irqrestore(&port->lock, flags);
+ cancel_delayed_work(&port->poll_work);
+ flush_workqueue(port->port_wq);
+ cancel_delayed_work(&port->poll_work);
+ xsmp_cleanup_stale_xsmp_sessions(port, 1);
+ xscore_remove_port(port);
+ }
+ ib_dereg_mr(xs_dev->mr);
+ ib_dealloc_pd(xs_dev->pd);
+ kfree(xs_dev);
+}
+
+/*
+ * Driver load entry point
+ */
+static int __init xscore_init(void)
+{
+ int ret;
+
+ if (!hostname)
+ strncpy(hostname_str, init_utsname()->nodename,
+ XSIGO_MAX_HOSTNAME);
+ else
+ strncpy(hostname_str, hostname, XSIGO_MAX_HOSTNAME);
+ hostname_str[XSIGO_MAX_HOSTNAME] = 0;
+
+ system_id_str[0] = 0;
+ if (system_id)
+ strncpy(system_id_str, system_id, sizeof(system_id_str) - 1);
+ system_id_str[sizeof(system_id_str) - 1] = 0;
+
+ xg_vmk_kompat_init();
+
+ INIT_LIST_HEAD(&xscore_port_list);
+ mutex_init(&xscore_port_mutex);
+
+ ret = xscore_create_procfs_entries();
+ if (ret)
+ return ret;
+
+ xsmp_module_init();
+ /*
+ * Now register with SA
+ */
+ ib_sa_register_client(&xscore_sa_client);
+
+ /*
+ * Now register with IB framework
+ */
+ ret = ib_register_client(&xscore_client);
+ if (ret) {
+ IB_ERROR("couldn't register IB client\n");
+ goto err1;
+ }
+ ret = xscore_uadm_init();
+ if (ret)
+ goto err2;
+ /* Wait for Sessions to come up */
+ xscore_wait_for_sessions(1);
+ return ret;
+err2:
+ ib_unregister_client(&xscore_client);
+err1:
+ ib_sa_unregister_client(&xscore_sa_client);
+ xsmp_module_destroy();
+ xscore_remove_procfs_entries();
+ return ret;
+}
+
+/*
+ * Driver unload entry point
+ */
+static void __exit xscore_exit(void)
+{
+ xscore_uadm_destroy();
+ ib_unregister_client(&xscore_client);
+ ib_sa_unregister_client(&xscore_sa_client);
+ xsmp_module_destroy();
+ xscore_remove_procfs_entries();
+ xg_vmk_kompat_cleanup();
+}
+
+module_init(xscore_init);
+module_exit(xscore_exit);
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _XSCORE_PRIV_H_
+#define _XSCORE_PRIV_H_
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/err.h>
+#include <linux/dma-mapping.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cm.h>
+
+#include <linux/version.h>
+
+#include <rdma/ib_addr.h>
+#include <rdma/ib_fmr_pool.h>
+#include <asm/byteorder.h>
+
+#include "xs_compat.h"
+#include "xscore_xds.h"
+#include "xsmp_common.h"
+#include "xsmp_session.h"
+
+#define MODULE_NAME "XSCORE"
+
+#define XSCORE_WQ_XDDS_HANDLER 0x1
+#define XSCORE_WQ_PORT_EVENTH 0x2
+#define XSCORE_WQ_XSMP_PROC_MSG 0x3
+#define XSCORE_DWQ_POLL_WORK 0x4
+#define XSCORE_DWQ_SM_WORK 0x5
+
+extern int xscore_debug;
+extern unsigned long xscore_wait_time;
+extern int xscore_force_sm_change;
+extern struct mutex xscore_port_mutex;
+extern unsigned long xscore_wq_state;
+extern unsigned long xscore_wq_jiffies;
+extern unsigned long xscore_last_wq;
+
+enum {
+ DEBUG_IB_INFO = 0x00000001,
+ DEBUG_IB_FUNCTION = 0x00000002,
+ DEBUG_XDS_INFO = 0x00000004,
+ DEBUG_XDS_FUNCTION = 0x00000008,
+ DEBUG_XSMP_INFO = 0x00000010,
+ DEBUG_XSMP_FUNCTION = 0x00000020,
+ DEBUG_UADM_INFO = 0x00000040,
+ DEBUG_UADM_FUNCTION = 0x00000080,
+ DEBUG_XDDS_INFO = 0x00000100,
+ DEBUG_XDDS_FUNCTION = 0x00000200,
+};
+
+#define PRINT(level, x, fmt, arg...) \
+ printk(level "%s: " fmt, MODULE_NAME, ##arg)
+
+#define PRINT_CONDITIONAL(level, x, condition, fmt, arg...) \
+ do { \
+ if (condition) \
+ printk(level "%s: %s: " fmt, \
+ MODULE_NAME, x, ##arg); \
+ } while (0)
+
+#define IB_PRINT(fmt, arg...) \
+ PRINT(KERN_INFO, "IB", fmt, ##arg)
+#define IB_ERROR(fmt, arg...) \
+ PRINT(KERN_ERR, "IB", fmt, ##arg)
+
+#define IB_FUNCTION(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "IB", \
+ (xscore_debug & DEBUG_IB_FUNCTION), \
+ fmt, ##arg)
+
+#define IB_INFO(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "IB", \
+ (xscore_debug & DEBUG_IB_INFO), \
+ fmt, ##arg)
+
+#define XDS_PRINT(fmt, arg...) \
+ PRINT(KERN_INFO, "XDS", fmt, ##arg)
+#define XDS_ERROR(fmt, arg...) \
+ PRINT(KERN_ERR, "XDS", fmt, ##arg)
+
+#define XDS_FUNCTION(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "XDS", \
+ (xscore_debug & DEBUG_XDS_FUNCTION), \
+ fmt, ##arg)
+
+#define XDS_INFO(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "XDS", \
+ (xscore_debug & DEBUG_XDS_INFO), \
+ fmt, ##arg)
+
+#define XSMP_PRINT(fmt, arg...) \
+ PRINT(KERN_INFO, "XSMP", fmt, ##arg)
+#define XSMP_ERROR(fmt, arg...) \
+ PRINT(KERN_ERR, "XSMP", fmt, ##arg)
+
+#define XSMP_FUNCTION(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "XSMP", \
+ (xscore_debug & DEBUG_XSMP_FUNCTION), \
+ fmt, ##arg)
+
+#define XSMP_INFO(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "XSMP", \
+ (xscore_debug & DEBUG_XSMP_INFO), \
+ fmt, ##arg)
+
+#define UADM_PRINT(fmt, arg...) \
+ PRINT(KERN_INFO, "UADM", fmt, ##arg)
+#define UADM_ERROR(fmt, arg...) \
+ PRINT(KERN_ERR, "UADM", fmt, ##arg)
+
+#define UADM_FUNCTION(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "UADM", \
+ (xscore_debug & DEBUG_UADM_FUNCTION), \
+ fmt, ##arg)
+
+#define UADM_INFO(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "UADM", \
+ (xscore_debug & DEBUG_UADM_INFO), \
+ fmt, ##arg)
+
+#define XDDS_PRINT(fmt, arg...) \
+ PRINT(KERN_INFO, "XDDS", fmt, ##arg)
+#define XDDS_ERROR(fmt, arg...) \
+ PRINT(KERN_ERR, "XDDS", fmt, ##arg)
+
+#define XDDS_FUNCTION(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "XDDS", \
+ (xscore_debug & DEBUG_XDDS_FUNCTION), \
+ fmt, ##arg)
+
+#define XDDS_INFO(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "XDDS", \
+ (xscore_debug & DEBUG_XDDS_INFO), \
+ fmt, ##arg)
+
+/*
+ * This structure represents context for the HCA
+ */
+struct xscore_dev {
+ struct list_head port_list;
+ struct ib_device *device;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ struct ib_event_handler event_handler;
+ /* We can remove fw_ver and hw_ver */
+ struct ib_device_attr dev_attr;
+ u64 fw_ver;
+ u32 hw_ver;
+ u32 vendor_part_id;
+ u8 is_shca;
+};
+
+enum {
+ PORT_XDS_PORT_NOT_ACTIVE_COUNTER,
+ PORT_XDS_SA_QUERY_ERROR_COUNTER,
+ PORT_XDS_SA_QUERY_TOUT_COUNTER,
+ PORT_XDS_SA_QUERY_COUNTER,
+ PORT_XDS_XDS_QUERY_ERROR_COUNTER,
+ PORT_XDS_XDS_QUERY_TOUT_COUNTER,
+ PORT_XDS_XDS_QUERY_COUNTER,
+ PORT_XDS_LIST_COUNT_ZERO_COUNTER,
+ PORT_XDS_LIST_COUNT_COUNTER,
+ PORT_MAX_COUNTERS
+};
+enum {
+ XDS_RECP_START = 1,
+ XDS_RECP_QUERY_IB_DONE,
+ XDS_RECP_SAUPDATE_DONE,
+ XDS_RECP_SAREC_DONE,
+ XDS_RECP_CREATEMAD_DONE,
+ XDS_RECP_CREATEAH_DONE,
+ XDS_RECP_SENDMAD_DONE,
+ XDS_RECP_FREEMAD_DONE,
+ XDS_RECP_DONE
+};
+
+/*
+ * This represents context fo each port
+ */
+/* TBD Add state in this a- PORT_ACTIVE ,b- XDS RECORD/ NO XDS RECORD */
+struct xscore_port {
+ spinlock_t lock;
+ struct xscore_dev *xs_dev; /* Back pointer to HCA context */
+ struct list_head port_list;
+ struct list_head gport_list;
+ unsigned long flags;
+#define XSCORE_PORT_SHUTDOWN 1
+#define XSCORE_PORT_LID_CHANGE 2
+#define XSCORE_PORT_PROCFS_CREATED 3
+#define XSCORE_SP_PRESENT 4
+#define XSCORE_SP_NOT_PRESENT 5
+#define XSCORE_FORCE_SM_CHANGE 6
+#define XSCORE_PORT_SMLID_CHANGE 7
+ u8 port_num;
+ struct workqueue_struct *port_wq;
+ struct delayed_work poll_work;
+ enum ib_event_type pevent;
+ struct work_struct ework;
+ int poll_interval;
+ int rec_poller_state;
+ unsigned long rec_poller_time;
+ struct ib_mad_agent *mad_agent;
+ struct ib_mad_send_buf *send_buf;
+ struct completion sa_query_done;
+ int sa_query_status;
+ struct completion xds_query_done;
+ struct xcm_list xcm_list;
+ struct ib_mad_recv_wc *mad_recv_wc;
+ u64 guid;
+ union ib_gid sgid;
+ u16 lid;
+ u16 sm_lid;
+ u16 xds_lid;
+ u64 xds_guid;
+ enum rdma_link_layer link_layer;
+ struct ib_ud_ctx *ib_ud_ctx;
+ struct list_head xsmp_list;
+ u32 counters[PORT_MAX_COUNTERS];
+};
+
+#define XS_UD_COPY_MSG 0x1
+
+static inline void xscore_set_wq_state(unsigned long state)
+{
+}
+
+static inline void xscore_clear_wq_state(unsigned long state)
+{
+}
+
+extern int xs_vpci_bus_init(void);
+extern void xs_vpci_bus_remove(void);
+
+extern int xs_ud_create(struct xscore_port *pinfop,
+ void (*callback)(void *, void *, int), void *arg);
+extern void xs_ud_destroy(struct xscore_port *pinfop);
+
+extern int xs_ud_send_msg(struct xscore_port *pinfop, uint8_t *macp,
+ void *msgp, int len, int flags);
+extern void xs_ud_free(void *msg);
+
+void xsmp_module_init(void);
+void xsmp_module_destroy(void);
+void xsmp_allocate_xsmp_session(struct xscore_port *port, u64 guid, u16 lid);
+void xsmp_cleanup_stale_xsmp_sessions(struct xscore_port *port, int force);
+/* Externs*/
+extern struct ib_sa_client xscore_sa_client;
+
+#endif /* _XSCORE_PRIV_H_ */
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/utsname.h>
+
+#include "xscore_priv.h"
+#include "xs_compat.h"
+#include "xscore.h"
+#include "xsmp.h"
+
+#define PFX "STATS"
+
+unsigned long xscore_wq_state;
+unsigned long xscore_wq_jiffies;
+unsigned long xscore_last_wq;
+
+struct proc_dir_entry *proc_root_xscore = NULL;
+struct proc_dir_entry *proc_root_xcpm = NULL;
+struct proc_dir_entry *proc_root_xcpm_info = NULL;
+struct proc_dir_entry *proc_root_xcpm_links = NULL;
+struct proc_dir_entry *proc_root_xcpm_ports = NULL;
+
+static char *ib_port_phys_state_str[] = {
+ "0: Link Down",
+ "1: Sleep",
+ "2: Polling",
+ "3: Disabled",
+ "4: Port Configuration Training",
+ "5: Link Up",
+ "6: Link Error Recovery",
+ "7: Phy Test",
+};
+
+static char *port_state2str[] = {
+ "PORT_NOP",
+ "PORT_DOWN",
+ "PORT_INIT",
+ "PORT_ARMED",
+ "PORT_ACTIVE",
+ "PORT_ACTIVE_DEFER",
+};
+
+static char *port_linkLayer2str[] = {
+ "Unspecified",
+ "Infiniband",
+ "Ethernet",
+};
+
+static int xcpm_port_proc_open_device(struct inode *inode, struct file *file);
+static int xcpm_port_proc_read_device(struct seq_file *m, void *data);
+static ssize_t xcpm_port_proc_write_device(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp);
+static int xcpm_xsmp_proc_open_device(struct inode *inode, struct file *file);
+static int xcpm_xsmp_proc_read_device(struct seq_file *m, void *data);
+static ssize_t xcpm_xsmp_proc_write_device(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp);
+static int xscore_proc_open_debug(struct inode *inode, struct file *file);
+static int xscore_proc_read_debug(struct seq_file *m, void *data);
+static ssize_t xscore_proc_write_debug(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp);
+static int xscore_proc_open_info(struct inode *inode, struct file *file);
+static int xscore_proc_read_info(struct seq_file *m, void *data);
+static ssize_t xscore_proc_write_info(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp);
+static int xscore_proc_open_systemid(struct inode *inode, struct file *file);
+static int xscore_proc_read_systemid(struct seq_file *m, void *data);
+static ssize_t xscore_proc_write_systemid(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp);
+static const struct file_operations xcpm_port_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xcpm_port_proc_open_device,
+ .read = seq_read,
+ .write = xcpm_port_proc_write_device,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations xcpm_xsmp_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xcpm_xsmp_proc_open_device,
+ .read = seq_read,
+ .write = xcpm_xsmp_proc_write_device,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations xscore_debug_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xscore_proc_open_debug,
+ .read = seq_read,
+ .write = xscore_proc_write_debug,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations xscore_info_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xscore_proc_open_info,
+ .read = seq_read,
+ .write = xscore_proc_write_info,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations xscore_systemid_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xscore_proc_open_systemid,
+ .read = seq_read,
+ .write = xscore_proc_write_systemid,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void calc_time_fjiffies(unsigned long ojiffies, unsigned long *tsecs,
+ unsigned long *tmins, unsigned long *thrs)
+{
+ unsigned long tmp_tsecs = 0;
+ *tsecs = *tmins = *thrs = 0;
+
+ tmp_tsecs = jiffies_to_msecs(jiffies - ojiffies) / 1000;
+ *thrs = tmp_tsecs / (60 * 60);
+ *tmins = (tmp_tsecs / 60 - ((*thrs) * 60));
+ *tsecs = tmp_tsecs - ((*tmins) * 60) - ((*thrs) * 60 * 60);
+}
+
+static ssize_t xcpm_port_proc_write_device(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+ struct xscore_port *ib_port = NULL;
+
+ file->private_data = PDE_DATA(file_inode(file));
+ ib_port = (struct xscore_port *)file->private_data;
+
+ memset(ib_port->counters, 0, sizeof(ib_port->counters));
+ return count;
+}
+
+static int xcpm_port_proc_read_device(struct seq_file *m, void *data)
+{
+ struct xscore_port *ib_port = NULL;
+ struct ib_port_attr port_attr;
+ u64 fw_ver;
+ unsigned long tsecs = 0, tmins = 0, thrs = 0;
+
+ ib_port = (struct xscore_port *)m->private;
+
+ (void)ib_query_port(ib_port->xs_dev->device, ib_port->port_num,
+ &port_attr);
+
+ seq_printf(m, "Device name: \t\t%s\n", ib_port->xs_dev->device->name);
+ fw_ver = ib_port->xs_dev->fw_ver;
+ seq_printf(m, "Device FW Version: \t%d.%d.%d\n", (int)(fw_ver >> 32),
+ (int)((fw_ver >> 16) & 0xFFFF), (int)(fw_ver & 0xFFFF));
+ seq_printf(m, "Port: \t\t\t%d\n", ib_port->port_num);
+ seq_printf(m, "Port %s: \t\t0x%llx\n",
+ ib_port->link_layer == IB_LINK_LAYER_ETHERNET ?
+ "MAC" : "GUID", ib_port->guid);
+ seq_printf(m, "Port PhysState: \t%s\n",
+ ib_port_phys_state_str[port_attr.phys_state]);
+ seq_printf(m, "Port State: \t\t%s\n", port_state2str[port_attr.state]);
+ if (ib_port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+ seq_printf(m, "Port LID: \t\t%d\n", port_attr.lid);
+ seq_printf(m, "Port SM LID: \t\t%d\n", port_attr.sm_lid);
+ } else {
+ if (ib_port->xs_dev->is_shca == 0 && port_attr.active_mtu == 4)
+ port_attr.active_mtu = 5;
+ }
+ calc_time_fjiffies(ib_port->rec_poller_time, &tsecs, &tmins, &thrs);
+ seq_printf(m, "Last XCM poll :\t\t%lu hrs %lu mins %lu seconds\n",
+ thrs, tmins, tsecs);
+ seq_printf(m, "Port XCM poll state: \t%d\n", ib_port->rec_poller_state);
+
+ /*
+ * IB8KTBD this reports wrong mtu for 8k IB Mtu defined for softhca
+ */
+ seq_printf(m, "Port MTU: \t\t%d (%d)\n", port_attr.active_mtu,
+ xg_ib_mtu_enum_to_int(port_attr.active_mtu));
+
+ seq_printf(m, "Port Link Layer: \t%s\n",
+ port_linkLayer2str[ib_port->link_layer]);
+ seq_puts(m, "\n");
+ if (ib_port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+ seq_printf(m, "Port XDS LID: \t\t%d\n", ib_port->xds_lid);
+ seq_printf(m, "Port XDS GUID: \t\t0x%llx\n", ib_port->xds_guid);
+ }
+ seq_puts(m, "\n");
+
+ seq_printf(m, "Port Not Active Counter: \t%d\n",
+ ib_port->counters[PORT_XDS_PORT_NOT_ACTIVE_COUNTER]);
+ seq_printf(m, "SA Query Error Counter: \t%d\n",
+ ib_port->counters[PORT_XDS_SA_QUERY_ERROR_COUNTER]);
+ seq_printf(m, "SA Query Timeout Counter: \t%d\n",
+ ib_port->counters[PORT_XDS_SA_QUERY_TOUT_COUNTER]);
+ seq_printf(m, "SA Query Counter: \t\t%d\n",
+ ib_port->counters[PORT_XDS_SA_QUERY_COUNTER]);
+ seq_printf(m, "XDS Query Counter: \t\t%d\n",
+ ib_port->counters[PORT_XDS_XDS_QUERY_COUNTER]);
+ seq_printf(m, "XDS Query Error Counter: \t%d\n",
+ ib_port->counters[PORT_XDS_XDS_QUERY_ERROR_COUNTER]);
+ seq_printf(m, "XDS List Count Zero Counter: \t%d\n",
+ ib_port->counters[PORT_XDS_LIST_COUNT_ZERO_COUNTER]);
+ seq_printf(m, "XDS Query Timeout Counter: \t%d\n",
+ ib_port->counters[PORT_XDS_XDS_QUERY_TOUT_COUNTER]);
+ seq_printf(m, "XDS List Count Counter: \t%d\n",
+ ib_port->counters[PORT_XDS_LIST_COUNT_COUNTER]);
+
+ return 0;
+}
+
+static int xcpm_port_proc_open_device(struct inode *inode, struct file *file)
+{
+ return single_open(file, xcpm_port_proc_read_device, PDE_DATA(inode));
+}
+
+void xcpm_port_add_proc_entry(struct xscore_port *port)
+{
+ struct proc_dir_entry *file;
+ char name[32];
+
+ if (test_and_set_bit(XSCORE_PORT_PROCFS_CREATED, &port->flags))
+ return;
+
+ sprintf(name, "%llx", port->guid);
+
+ file = proc_create_data(name, S_IFREG, proc_root_xcpm_ports,
+ &xcpm_port_proc_fops, port);
+ if (!file)
+ pr_err("unable to create /proc/driver/xscore/xcpm/ports/%s.\n", name);
+}
+
+void xcpm_port_remove_proc_entry(struct xscore_port *port)
+{
+ char name[32];
+
+ sprintf(name, "%llx", port->guid);
+ remove_proc_entry(name, proc_root_xcpm_ports);
+ clear_bit(XSCORE_PORT_PROCFS_CREATED, &port->flags);
+}
+
+static ssize_t xcpm_xsmp_proc_write_device(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+ struct xsmp_ctx *ctx = NULL;
+ int action, ret;
+ char *buf = (char *) __get_free_page(GFP_USER);
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(buf, buffer, count - 1)) {
+ goto out;
+ }
+ buf[count] = '\0';
+
+ file->private_data = PDE_DATA(file_inode(file));
+ ctx = (struct xsmp_ctx *)file->private_data;
+
+ ret = kstrtoint(buf, 0, &action);
+ if (ret != 0) {
+ return -EINVAL;
+ }
+ switch (action) {
+ case 0: /* Clear counters */
+ memset(ctx->counters, 0, sizeof(ctx->counters));
+ break;
+ case 4567:
+ pr_err("XSMP is shutdown by user %s : %s (0x%llx)\n",
+ ctx->session_name, ctx->chassis_name, ctx->dguid);
+ set_bit(XSMP_SHUTTINGDOWN_BIT, &ctx->flags);
+ break;
+ default:
+ break;
+ }
+ return count;
+out:
+ free_page((unsigned long)buf);
+ return -EINVAL;
+}
+
+static int xcpm_xsmp_proc_read_device(struct seq_file *m, void *data)
+{
+ struct xsmp_ctx *xsmp_ctx = (struct xsmp_ctx *)m->private;
+ char *state_str = NULL;
+ unsigned long tsecs = 0, tmins = 0, thrs = 0;
+ char tmp_buf[256];
+
+ if (xsmp_ctx->state == XSMP_SESSION_CONNECTED)
+ state_str = "Up";
+ else
+ state_str = "Down";
+
+ seq_printf(m, "State:\t\t\t\t%s\n", state_str);
+ seq_printf(m, "Hello interval (secs):\t\t%d\n",
+ xsmp_ctx->hello_timeout / (3 * HZ));
+ seq_printf(m, "Session timeout (secs):\t\t%d\n",
+ xsmp_ctx->hello_timeout / HZ);
+ seq_printf(m, "Datapath timeout (secs):\t%d\n",
+ xsmp_ctx->datapath_timeout);
+
+ seq_printf(m, "CA Device Name:\t\t\t%s\n",
+ xsmp_ctx->port->xs_dev->device->name);
+ seq_printf(m, "Local port:\t\t\t%d\n", (int)xsmp_ctx->port->port_num);
+ seq_printf(m, "Local lid:\t\t\t%d\n", (int)xsmp_ctx->port->lid);
+ seq_printf(m, "Local guid:\t\t\t0x%Lx\n", xsmp_ctx->port->guid);
+ seq_printf(m, "Remote lid:\t\t\t%d\n", xsmp_ctx->dlid);
+ seq_printf(m, "Remote guid:\t\t\t0x%Lx\n", xsmp_ctx->dguid);
+
+ seq_printf(m, "Chassis's xcpm version:\t\t%x\n",
+ xsmp_ctx->xsigo_xsmp_version);
+ seq_printf(m, "Chassis Name:\t\t\t%s\n", xsmp_ctx->chassis_name);
+ seq_printf(m, "Server-Profile Name:\t\t%s\n", xsmp_ctx->session_name);
+
+ seq_puts(m, "\n");
+ seq_printf(m, "Port Link Layer:\t\t%s\n",
+ port_linkLayer2str[xsmp_ctx->port->link_layer]);
+ seq_puts(m, "\n");
+
+ if (xsmp_ctx->state == XSMP_SESSION_CONNECTED) {
+ int lqpn, dqpn;
+
+ lqpn = xsmp_ctx->conn_ctx.local_qpn;
+ dqpn = xsmp_ctx->conn_ctx.remote_qpn;
+
+ calc_time_fjiffies(xsmp_ctx->jiffies, &tsecs, &tmins, &thrs);
+ seq_printf(m, "QP end points:\t\t(0x%x, %d) : (0x%x, %d)\n",
+ lqpn, lqpn, dqpn, dqpn);
+ }
+
+ tmp_buf[0] = 0;
+ if (test_bit(XSMP_REG_SENT, &xsmp_ctx->flags))
+ strcat(tmp_buf, "XSMP Reg Sent");
+ else
+ strcat(tmp_buf, "XSMP Reg Not Sent");
+ if (test_bit(XSMP_REG_CONFIRM_RCVD, &xsmp_ctx->flags))
+ strcat(tmp_buf, " + XSMP Reg Conf Rcvd");
+ else
+ strcat(tmp_buf, " + XSMP Reg Conf Not Rcvd");
+
+ if (test_bit(XSMP_IBLINK_DOWN, &xsmp_ctx->flags))
+ strcat(tmp_buf, " + IB Link Down");
+
+ if (xsmp_ctx->conn_ctx.features & XSCORE_USE_CHECKSUM)
+ strcat(tmp_buf, " + Checksum Mode");
+ else
+ strcat(tmp_buf, " + ICRC Mode");
+
+ seq_printf(m, "%s\n\n", tmp_buf);
+
+ seq_printf(m, "Session Uptime:\t\t\t%lu hrs %lu mins %lu seconds\n",
+ thrs, tmins, tsecs);
+
+ calc_time_fjiffies(xsmp_ctx->hello_jiffies, &tsecs, &tmins, &thrs);
+ seq_printf(m, "Last Hello received :\t\t%lu hrs %lu mins %lu seconds\n",
+ thrs, tmins, tsecs);
+ seq_printf(m, "Number of session timeouts:\t%d\n",
+ xsmp_ctx->counters[XSMP_SESSION_TIMEOUT_COUNTER]);
+ seq_printf(m, "Reg Sent Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_REG_SENT_COUNTER]);
+ seq_printf(m, "Resource List Sent Counter:\t%d\n",
+ xsmp_ctx->counters[XSMP_RES_LIST_COUNTER]);
+ seq_printf(m, "Reg Confirm Rcvd Counter:\t%d\n",
+ xsmp_ctx->counters[XSMP_REG_CONF_COUNTER]);
+ seq_printf(m, "Rej Rcvd Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_REJ_RCVD_COUNTER]);
+ seq_printf(m, "Shutdown Rcvd Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_SHUTDOWN_RCVD_COUNTER]);
+ seq_printf(m, "XVE Type Rcvd Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_XVE_MESSAGE_COUNTER]);
+ seq_printf(m, "VNIC Type Rcvd Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_VNIC_MESSAGE_COUNTER]);
+ seq_printf(m, "VHBA Type Rcvd Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_VHBA_MESSAGE_COUNTER]);
+ seq_printf(m, "USPACE Type Rcvd Counter:\t%d\n",
+ xsmp_ctx->counters[XSMP_USPACE_MESSAGE_COUNTER]);
+ seq_printf(m, "SESSION Type Rcvd Counter:\t%d\n",
+ xsmp_ctx->counters[XSMP_SESSION_MESSAGE_COUNTER]);
+ seq_printf(m, "VHBA Type Sent Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_VHBA_MESSAGE_SENT_COUNTER]);
+ seq_printf(m, "VNIC Type Sent Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_VNIC_MESSAGE_SENT_COUNTER]);
+ seq_printf(m, "USPACE Type Sent Counter:\t%d\n",
+ xsmp_ctx->counters[XSMP_USPACE_MESSAGE_SENT_COUNTER]);
+ seq_printf(m, "SESSION Type Sent Counter:\t%d\n",
+ xsmp_ctx->counters[XSMP_SESSION_MESSAGE_SENT_COUNTER]);
+ seq_printf(m, "Hello recv count:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_HELLO_RCVD_COUNTER]);
+ seq_printf(m, "Hello recv(INTERRUPT_MODE):\t%d\n",
+ xsmp_ctx->counters[XSMP_HELLO_INTERRUPT_COUNTER]);
+ seq_printf(m, "Hello send count:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_HELLO_SENT_COUNTER]);
+ seq_printf(m, "Seq Number Mismatch Counter:\t%d\n",
+ xsmp_ctx->counters[XSMP_SEQ_MISMATCH_COUNTER]);
+ seq_printf(m, "Ring Full Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_SESSION_RING_FULL_COUNTER]);
+ seq_printf(m, "Send Error Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_SESSION_SEND_ERROR_COUNTER]);
+ seq_printf(m, "Conn Down Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_SESSION_CONN_DOWN_COUNTER]);
+ seq_printf(m, "Total XSMP msg Counter:\t\t%d\n",
+ xsmp_ctx->counters[XSMP_TOTAL_MSG_SENT_COUNTER]);
+ seq_printf(m, "Session Conn Retry Counter:\t%d\n",
+ xsmp_ctx->counters[XSMP_CONN_RETRY_COUNTER]);
+ seq_printf(m, "Session Conn Failed Counter:\t%d\n",
+ xsmp_ctx->counters[XSMP_CONN_FAILED_COUNTER]);
+ seq_printf(m, "Session Conn Success Counter:\t%d\n",
+ xsmp_ctx->counters[XSMP_CONN_SUCCESS_COUNTER]);
+ return 0;
+}
+
+static int xcpm_xsmp_proc_open_device(struct inode *inode, struct file *file)
+{
+ return single_open(file, xcpm_xsmp_proc_read_device, PDE_DATA(inode));
+}
+
+void xcpm_xsmp_add_proc_entry(struct xsmp_ctx *xsmp_ctx)
+{
+ struct proc_dir_entry *file;
+ char name[32];
+
+ sprintf(name, "%d", xsmp_ctx->idr);
+
+ file = proc_create_data(name, S_IFREG, proc_root_xcpm_links,
+ &xcpm_xsmp_proc_fops, xsmp_ctx);
+ if (!file)
+ pr_err("Unable to create /proc/driver/xscore/xcpm/links/%s.\n", name);
+}
+
+void xcpm_xsmp_remove_proc_entry(struct xsmp_ctx *xsmp_ctx)
+{
+ char name[32];
+
+ sprintf(name, "%d", xsmp_ctx->idr);
+ remove_proc_entry(name, proc_root_xcpm_links);
+}
+
+static ssize_t xscore_proc_write_systemid(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+ char *buf = (char *) __get_free_page(GFP_USER);
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(buf, buffer, count - 1)) {
+ goto out;
+ }
+ buf[count] = '\0';
+
+ memcpy(system_id_str, buf, count);
+ if (system_id_str[count - 1] == '\n')
+ system_id_str[count - 1] = 0;
+ else
+ system_id_str[count] = 0;
+ return count;
+out:
+ free_page((unsigned long)buf);
+ return -EINVAL;
+}
+
+static int xscore_proc_read_systemid(struct seq_file *m, void *data)
+{
+ if (system_id_str[0])
+ seq_printf(m, "system_id:\t\t\t%s\n", system_id_str);
+ else
+ seq_puts(m, "system_id:\t\t\t<NULL>\n");
+ return 0;
+}
+
+static int xscore_proc_open_systemid(struct inode *inode, struct file *file)
+{
+ return single_open(file, xscore_proc_read_systemid, PDE_DATA(inode));
+}
+
+static ssize_t xscore_proc_write_info(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp)
+{
+ int cc = count > XSIGO_MAX_HOSTNAME ? XSIGO_MAX_HOSTNAME : count;
+ char *buf = (char *) __get_free_page(GFP_USER);
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(buf, buffer, cc - 1)) {
+ goto out;
+ }
+ buf[cc] = '\0';
+
+ memcpy(hostname_str, buf, cc);
+ /*
+ * The last character is a newline, overwrite it
+ */
+ if (hostname_str[cc - 1] == '\n')
+ hostname_str[cc - 1] = 0;
+ else
+ hostname_str[cc] = 0;
+ return count;
+out:
+ free_page((unsigned long)buf);
+ return -EINVAL;
+}
+
+static int xscore_proc_read_info(struct seq_file *m, void *data)
+{
+ char buf[XSIGO_MAX_HOSTNAME];
+
+ seq_printf(m, "ULP services mask:\t\t0x%x\n", xcpm_resource_flags);
+ seq_printf(m, "Boot_flag:\t\t\t%d\n", boot_flag);
+ if (system_id_str[0])
+ seq_printf(m, "system_id:\t\t\t%s\n", system_id_str);
+ else
+ seq_puts(m, "system_id:\t\t\t<NULL>\n");
+ snprintf(buf, XSIGO_MAX_HOSTNAME, "HostName:\t\t\t%s\n", hostname_str);
+ seq_puts(m, buf);
+ if (os_version)
+ seq_printf(m, "OS version:\t\t\t%s\n", os_version);
+ if (os_arch)
+ seq_printf(m, "OS Arch:\t\t\t%s\n", os_arch);
+ return 0;
+}
+
+static int xscore_proc_open_info(struct inode *inode, struct file *file)
+{
+ return single_open(file, xscore_proc_read_info, PDE_DATA(inode));
+}
+
+static int xscore_proc_read_debug(struct seq_file *m, void *data)
+{
+ unsigned long tsecs = 0, tmins = 0, thrs = 0;
+
+ calc_time_fjiffies(xscore_wq_jiffies, &tsecs, &tmins, &thrs);
+
+ seq_printf(m, "Total wait time(secs): %ld\n", (xscore_wait_time / HZ));
+ seq_printf(m, "Debug Bit mask : 0x%x\n", xscore_debug);
+ seq_printf(m, "Force sm change : 0x%x\n", xscore_force_sm_change);
+ seq_printf(m, "Workqueue state : 0x%lx\n", xscore_wq_state);
+ seq_printf(m, "Last WQ(%lx) trigger time :\t%lu hrs",
+ xscore_last_wq, thrs);
+ seq_printf(m, "Last WQ : %lu mins %lu seconds\n", tmins, tsecs);
+
+ return 0;
+}
+
+static ssize_t xscore_proc_write_debug(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp)
+{
+ int ret;
+ char *buf = (char *) __get_free_page(GFP_USER);
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(buf, buffer, count - 1)) {
+ goto out;
+ }
+ buf[count] = '\0';
+
+ ret = kstrtoint(buf, 0, &xscore_debug);
+ if (ret != 0) {
+ return -EINVAL;
+ }
+ ret = kstrtoint(buf, 0, &xscore_force_sm_change);
+ if (ret != 0) {
+ return -EINVAL;
+ }
+ return count;
+out:
+ free_page((unsigned long)buf);
+ return -EINVAL;
+}
+
+static int xscore_proc_open_debug(struct inode *inode, struct file *file)
+{
+ return single_open(file, xscore_proc_read_debug, PDE_DATA(inode));
+}
+
+int xscore_create_procfs_entries(void)
+{
+ int ret = 0;
+ struct proc_dir_entry *file_d;
+
+ proc_root_xscore = proc_mkdir("driver/xscore", NULL);
+ if (!proc_root_xscore) {
+ pr_err("Unable to create /proc/driver/xscore\n");
+ return -ENOMEM;
+ }
+ file_d = proc_create_data("debug", S_IFREG, proc_root_xscore,
+ &xscore_debug_proc_fops, NULL);
+ if (!file_d) {
+ pr_err(PFX
+ "Unable to create /proc/driver/xscore/debug\n");
+ ret = -ENOMEM;
+ goto no_debug;
+ }
+
+ file_d = proc_create_data("info", S_IFREG, proc_root_xscore,
+ &xscore_info_proc_fops, NULL);
+ if (!file_d) {
+ pr_err(PFX
+ "Unable to create /proc/driver/xscore/info\n");
+ ret = -ENOMEM;
+ goto no_info;
+ }
+
+ file_d = proc_create_data("systemid", S_IFREG, proc_root_xscore,
+ &xscore_systemid_proc_fops, NULL);
+ if (!file_d) {
+ pr_err(PFX
+ "Unable to create /proc/driver/xscore/systermid\n");
+ ret = -ENOMEM;
+ goto no_systemid;
+ }
+
+ proc_root_xcpm = proc_mkdir("xcpm", proc_root_xscore);
+ if (!proc_root_xcpm) {
+ pr_err(PFX
+ "Unable to create /proc/driver/xscore/xcpm\n");
+ ret = -ENOMEM;
+ goto no_xcpm;
+ }
+
+ proc_root_xcpm_links = proc_mkdir("links", proc_root_xcpm);
+ if (!proc_root_xcpm_links) {
+ pr_err(PFX
+ "Unable to create /proc/driver/xscore/xcpm/links\n");
+ ret = -ENOMEM;
+ goto no_links;
+ }
+ proc_root_xcpm_ports = proc_mkdir("ports", proc_root_xcpm);
+ if (!proc_root_xcpm_ports) {
+ pr_err(PFX
+ "Unable to create /proc/driver/xscore/xcpm/ports\n");
+ ret = -ENOMEM;
+ goto no_ports;
+ }
+ return 0;
+
+no_ports:
+ remove_proc_entry("links", proc_root_xcpm);
+no_links:
+ remove_proc_entry("xcpm", proc_root_xscore);
+no_xcpm:
+ remove_proc_entry("systemid", proc_root_xscore);
+no_systemid:
+ remove_proc_entry("info", proc_root_xscore);
+no_info:
+ remove_proc_entry("debug", proc_root_xscore);
+no_debug:
+ remove_proc_entry("driver/xscore", NULL);
+ return ret;
+}
+
+void xscore_remove_procfs_entries(void)
+{
+ remove_proc_entry("ports", proc_root_xcpm);
+ remove_proc_entry("links", proc_root_xcpm);
+ remove_proc_entry("xcpm", proc_root_xscore);
+ remove_proc_entry("systemid", proc_root_xscore);
+ remove_proc_entry("info", proc_root_xscore);
+ remove_proc_entry("debug", proc_root_xscore);
+ remove_proc_entry("driver/xscore", NULL);
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * This file implements USPACE protocol
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+
+#include "xscore_priv.h"
+#include "xscore.h"
+#include "xsmp_common.h"
+#include "xscore.h"
+
+#define PFX "UADM"
+
+static dev_t xscore_devt;
+static struct cdev xscore_cdev;
+static struct list_head read_list;
+static int xscore_svc_id = -1;
+struct mutex mut_lock;
+static unsigned long xscore_uadm_flags;
+static atomic_t list_count;
+static struct class *uadm_class;
+static DECLARE_WAIT_QUEUE_HEAD(read_wait);
+
+#define XSCORE_UADM_OPEN 0x1
+
+#define XSCORE_UADM_MAX_MSGS 256
+
+struct xscore_uadm_hdr {
+ u8 opcode;
+ int flags;
+ void *xsmp_hndl;
+};
+
+enum {
+ XSCORE_UADM_CHASSIS_MSG = 1,
+ XSCORE_UADM_REG_MSG,
+};
+
+struct xscore_uadm_msg {
+ struct list_head list;
+ struct xscore_uadm_hdr hdr;
+ void *msg;
+ int len;
+};
+
+/*
+ * Called from thread context
+ */
+void xscore_uadm_receive(void *xsmp_hndl, u8 *data, int len)
+{
+ struct xscore_uadm_msg *msg;
+ int err = 0;
+
+ mutex_lock(&mut_lock);
+ if (!xsigod_enable) {
+ err++;
+ goto out;
+ }
+ if (atomic_read(&list_count) > XSCORE_UADM_MAX_MSGS) {
+ UADM_ERROR("%s: receive Q full, dropping packet\n", __func__);
+ err++;
+ goto out;
+ }
+ msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+ if (!msg) {
+ err++;
+ goto out;
+ }
+ msg->msg = data;
+ msg->hdr.xsmp_hndl = xsmp_hndl;
+ msg->hdr.flags = 0;
+ msg->hdr.opcode = XSCORE_UADM_CHASSIS_MSG;
+ msg->len = len;
+ list_add_tail(&msg->list, &read_list);
+ atomic_inc(&list_count);
+ wake_up_interruptible(&read_wait);
+out:
+ if (err)
+ kfree(data);
+ mutex_unlock(&mut_lock);
+}
+
+/*
+ * Called from thread context
+ */
+static void xscore_event_handler(void *xsmp_hndl, int event)
+{
+ mutex_lock(&mut_lock);
+ switch (event) {
+ default:
+ break;
+ }
+ mutex_unlock(&mut_lock);
+}
+
+static int xscore_uadm_register(void)
+{
+ struct xsmp_service_reg_info sinfo = {
+ .receive_handler = xscore_uadm_receive,
+ .event_handler = xscore_event_handler,
+ .ctrl_message_type = XSMP_MESSAGE_TYPE_USPACE,
+ .resource_flag_index = RESOURCE_FLAG_INDEX_USPACE
+ };
+ int ret = 0;
+
+ UADM_FUNCTION("%s:\n", __func__);
+ xscore_svc_id = xcpm_register_service(&sinfo);
+ if (xscore_svc_id < 0) {
+ UADM_ERROR("%s: xcpm_register_service failed %d\n",
+ __func__, xscore_svc_id);
+ clear_bit(XSCORE_UADM_OPEN, &xscore_uadm_flags);
+ ret = -ENODEV;
+ }
+
+ UADM_INFO("%s: Successful\n", __func__);
+ return ret;
+}
+
+static int xscore_uadm_open(struct inode *inode, struct file *file)
+{
+ int ret = 0;
+
+ if (test_and_set_bit(XSCORE_UADM_OPEN, &xscore_uadm_flags)) {
+ UADM_ERROR("%s: Already open\n", __func__);
+ ret = -EBUSY;
+ }
+ return ret;
+}
+
+static int xscore_uadm_release(struct inode *inode, struct file *file)
+{
+ struct xscore_uadm_msg *msg, *tmsg;
+
+ mutex_lock(&mut_lock);
+ /* unregister service */
+ xcpm_unregister_service(xscore_svc_id);
+ xscore_svc_id = -1;
+ list_for_each_entry_safe(msg, tmsg, &read_list, list) {
+ list_del(&msg->list);
+ kfree(msg->msg);
+ kfree(msg);
+ }
+ clear_bit(XSCORE_UADM_OPEN, &xscore_uadm_flags);
+ mutex_unlock(&mut_lock);
+ UADM_INFO("%s: Successful\n", __func__);
+ return 0;
+}
+
+static unsigned int xscore_uadm_poll(struct file *file, poll_table *wait)
+{
+ unsigned int pollflags = 0;
+
+ poll_wait(file, &read_wait, wait);
+ mutex_lock(&mut_lock);
+ if (!list_empty(&read_list))
+ pollflags = POLLIN | POLLRDNORM;
+ mutex_unlock(&mut_lock);
+ return pollflags;
+}
+
+#define HDR_LEN (sizeof(struct xscore_uadm_hdr))
+
+/*
+ * Make it a blocking call later XXX
+ */
+static ssize_t xscore_uadm_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ int ret;
+ struct xscore_uadm_msg *msg;
+
+ mutex_lock(&mut_lock);
+ if (list_empty(&read_list)) {
+ ret = -ENODATA;
+ goto out;
+ }
+ msg = list_entry(read_list.next, struct xscore_uadm_msg, list);
+ list_del(&msg->list);
+ atomic_dec(&list_count);
+ ret = msg->len > (count - HDR_LEN) ? (count - HDR_LEN) : msg->len;
+ if (copy_to_user(buf, &msg->hdr, HDR_LEN) ||
+ copy_to_user(buf + HDR_LEN, msg->msg, ret))
+ ret = -EFAULT;
+ *ppos += (ret + HDR_LEN);
+ kfree(msg->msg);
+ kfree(msg);
+out:
+ mutex_unlock(&mut_lock);
+ return ret;
+}
+
+static ssize_t xscore_uadm_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ void *msg = NULL;
+ int len;
+ struct xscore_uadm_hdr hdr;
+ int ret;
+
+ len = count - HDR_LEN;
+ if (len) {
+ msg = kmalloc(len, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+ }
+ mutex_lock(&mut_lock);
+ if (copy_from_user(&hdr, buf, HDR_LEN) ||
+ (len && copy_from_user(msg, buf + HDR_LEN, len))) {
+ UADM_ERROR("%s: copy_from_user error\n", __func__);
+ ret = -EFAULT;
+ if (msg != NULL)
+ kfree(msg);
+ goto out;
+ }
+ /*
+ * Check type of command and handle it accordingly
+ */
+ switch (hdr.opcode) {
+ case XSCORE_UADM_REG_MSG:
+ if (xscore_uadm_register())
+ ret = -EBUSY;
+ else {
+ ret = count;
+ *ppos += count;
+ }
+ goto out;
+ default:
+ break;
+ }
+
+ ret = xcpm_send_message(hdr.xsmp_hndl, xscore_svc_id, msg, len);
+ if (ret) {
+ UADM_ERROR("%s: xcpm_send_message error %d sess hndl: %p\n",
+ __func__, ret, hdr.xsmp_hndl);
+ ret = -EINVAL;
+ if (msg != NULL)
+ kfree(msg);
+ goto out;
+ }
+ ret = count;
+ *ppos += count;
+out:
+ mutex_unlock(&mut_lock);
+ return ret;
+}
+
+static const struct file_operations xscore_fops = {
+ .open = xscore_uadm_open,
+ .release = xscore_uadm_release,
+ .read = xscore_uadm_read,
+ .write = xscore_uadm_write,
+ .poll = xscore_uadm_poll,
+ .owner = THIS_MODULE,
+};
+
+void xscore_uadm_destroy(void)
+{
+ device_destroy(uadm_class,
+ MKDEV(MAJOR(xscore_devt), MINOR(xscore_devt)));
+ class_destroy(uadm_class);
+ cdev_del(&xscore_cdev);
+ unregister_chrdev_region(xscore_devt, 1);
+ mutex_destroy(&mut_lock);
+}
+
+int xscore_uadm_init(void)
+{
+ int result;
+
+ INIT_LIST_HEAD(&read_list);
+ mutex_init(&mut_lock);
+
+ result = alloc_chrdev_region(&xscore_devt, 0, 1, "kxsigod");
+ if (result) {
+ UADM_ERROR("%s: alloc_chrdev_region error %d\n", __func__,
+ result);
+ mutex_destroy(&mut_lock);
+ return result;
+ }
+
+ cdev_init(&xscore_cdev, &xscore_fops);
+
+ result = cdev_add(&xscore_cdev, xscore_devt, 1);
+ if (result) {
+ UADM_ERROR("%s: cdev_add error %d\n", __func__, result);
+ unregister_chrdev_region(xscore_devt, 1);
+ mutex_destroy(&mut_lock);
+ return result;
+ }
+ uadm_class = class_create(THIS_MODULE, "kxsigod");
+ if (IS_ERR(uadm_class)) {
+ result = PTR_ERR(uadm_class);
+ UADM_ERROR("%s: class_create error %d\n", __func__, result);
+ cdev_del(&xscore_cdev);
+ unregister_chrdev_region(xscore_devt, 1);
+ mutex_destroy(&mut_lock);
+ return result;
+ }
+ device_create(uadm_class, 0,
+ MKDEV(MAJOR(xscore_devt), MINOR(xscore_devt)), 0,
+ "kxsigod");
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sysfs.h>
+#include <linux/syscalls.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+
+#include "xs_compat.h"
+#include "xscore.h"
+
+int xscore_vpci_enable = 1;
+module_param(xscore_vpci_enable, int, 0644);
+
+#define PCI_VENDOR_ID_XSIGO 0x199d
+#define PCI_DEVICE_ID_XSIGO_VNIC 0x8209
+
+static struct pci_bus *vbus;
+static struct pci_sysdata *sysdata;
+
+static DEFINE_PCI_DEVICE_TABLE(xs_vpci_dev_table) = {
+ {PCI_DEVICE(PCI_VENDOR_ID_XSIGO, PCI_DEVICE_ID_XSIGO_VNIC)},
+ {0}
+};
+
+MODULE_DEVICE_TABLE(pci, xs_vpci_dev_table);
+
+int xs_vpci_read(struct pci_bus *bus, unsigned int devfn, int where,
+ int size, u32 *val)
+{
+ switch (where) {
+ case PCI_VENDOR_ID:
+ *val = PCI_VENDOR_ID_XSIGO | PCI_DEVICE_ID_XSIGO_VNIC << 16;
+ /* our id */
+ break;
+ case PCI_COMMAND:
+ *val = 0;
+ break;
+ case PCI_HEADER_TYPE:
+ *val = PCI_HEADER_TYPE_NORMAL;
+ break;
+ case PCI_STATUS:
+ *val = 0;
+ break;
+ case PCI_CLASS_REVISION:
+ *val = (2 << 24) | (0 << 16) | 1;
+ /* network class, ethernet controller, revision 1 */
+ break;
+ case PCI_INTERRUPT_PIN:
+ *val = 0;
+ break;
+ case PCI_SUBSYSTEM_VENDOR_ID:
+ *val = 0;
+ break;
+ case PCI_SUBSYSTEM_ID:
+ *val = 0;
+ break;
+ default:
+ *val = 0;
+ /* sensible default */
+ }
+ return 0;
+}
+
+int xs_vpci_write(struct pci_bus *bus, unsigned int devfn, int where,
+ int size, u32 val)
+{
+ switch (where) {
+ case PCI_BASE_ADDRESS_0:
+ case PCI_BASE_ADDRESS_1:
+ case PCI_BASE_ADDRESS_2:
+ case PCI_BASE_ADDRESS_3:
+ case PCI_BASE_ADDRESS_4:
+ case PCI_BASE_ADDRESS_5:
+ break;
+ }
+ return 0;
+}
+
+struct pci_ops xs_vpci_ops = {
+ .read = xs_vpci_read,
+ .write = xs_vpci_write
+};
+
+struct pci_dev *xs_vpci_prep_vnic(struct net_device *netdev, char *vnic_name,
+ int devn)
+{
+ struct pci_dev *pcidev = NULL;
+ /* netdev->ifindex always comes as zero
+ * for rhel5 versions before registration
+ */
+
+ if (!boot_flag || vbus == NULL)
+ return NULL;
+
+ pcidev = pci_scan_single_device(vbus, devn);
+
+ if (pcidev == NULL)
+ return NULL;
+
+ pci_dev_get(pcidev);
+
+ pci_bus_add_devices(vbus);
+ SET_NETDEV_DEV(netdev, &pcidev->dev);
+ return pcidev;
+}
+EXPORT_SYMBOL(xs_vpci_prep_vnic);
+
+void *xs_vpci_add_vnic(char *vnic_name, int devn)
+{
+ struct pci_dev *pcidev;
+ struct net_device *netdev;
+ int ret;
+
+ if (vbus == NULL)
+ return NULL;
+ pcidev = pci_scan_single_device(vbus, devn);
+ if (pcidev == NULL)
+ return NULL;
+
+ pci_dev_get(pcidev);
+ /*
+ * Better to use compat layer, but for now since this is citrix specific
+ * will use LINUX version magic
+ */
+ netdev = dev_get_by_name(&init_net, vnic_name);
+ if (netdev == NULL) {
+ pci_dev_put(pcidev);
+ return NULL;
+ }
+ pci_bus_add_device(pcidev);
+
+ ret = sysfs_create_link(&netdev->dev.kobj, &pcidev->dev.kobj, "device");
+ if (ret) {
+ pci_stop_and_remove_bus_device(pcidev);
+ dev_put(netdev);
+ pci_dev_put(pcidev);
+ pcidev = NULL;
+ }
+ return pcidev;
+}
+EXPORT_SYMBOL(xs_vpci_add_vnic);
+
+void xs_vpci_remove_vnic(struct net_device *netdev, void *hndl)
+{
+ struct pci_dev *pcidev = hndl;
+
+ if (vbus == NULL)
+ return;
+ if (!boot_flag) {
+ sysfs_remove_link(&netdev->dev.kobj, "device");
+ dev_put(netdev);
+ }
+ pci_stop_and_remove_bus_device(pcidev);
+ pci_dev_put(pcidev);
+}
+EXPORT_SYMBOL(xs_vpci_remove_vnic);
+
+void xs_vpci_vdev_remove(struct pci_dev *dev)
+{
+}
+
+static struct pci_driver xs_vpci_vdev_driver = {
+ .name = "Xsigo-Virtual-NIC",
+ .id_table = xs_vpci_dev_table,
+ .remove = xs_vpci_vdev_remove
+};
+
+int xs_vpci_bus_init(void)
+{
+ int i = 100;
+
+ if (!xscore_vpci_enable)
+ return 0;
+
+ sysdata = kzalloc(sizeof(void *), GFP_KERNEL);
+ while (i > 0) {
+ vbus = pci_scan_bus_parented(NULL, i, &xs_vpci_ops, sysdata);
+ if (vbus != NULL)
+ break;
+ memset(sysdata, 0, sizeof(void *));
+ i--;
+ }
+ if (vbus == NULL) {
+ kfree(sysdata);
+ return -EINVAL;
+ }
+ if (pci_register_driver(&xs_vpci_vdev_driver) < 0) {
+ pci_remove_bus(vbus);
+ vbus = NULL;
+ return -EINVAL;
+ }
+ return 0;
+}
+
+void xs_vpci_bus_remove(void)
+{
+ if (vbus) {
+ pci_unregister_driver(&xs_vpci_vdev_driver);
+ device_unregister(vbus->bridge);
+ pci_remove_bus(vbus);
+ kfree(sysdata);
+ vbus = NULL;
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSCORE_XDS_H__
+#define __XSCORE_XDS_H__
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+
+#define XCM_REC_VERSION 1
+#define MAX_XCFM_COUNT 8
+
+#define XSIGO_MGMT_CLASS 0x0B
+#define XSIGO_MGMT_CLASS_VERSION 0x02
+
+#define IB_MAD_ATTR_XCM_REQUEST 0xB002
+
+#define XSIGO_MGMT_METHOD_GET IB_MGMT_METHOD_GET
+#define XSIGO_MGMT_METHOD_SET IB_MGMT_METHOD_SET
+
+#define XSIGO_MAX_HOSTNAME 65
+#define XSIGO_MAX_OS_VERSION_LEN 32
+#define XSIGO_MAX_OS_ARCH_LEN 16
+#define XSIGO_MAX_BUILD_VER_LEN 16
+
+struct xcfm_record {
+ u64 port_id;
+ u16 xcm_lid; /* lid of the XCM port */
+ u8 reserved[10];
+} __packed;
+
+struct xcm_list {
+ u8 count;
+ u8 xcm_version;
+ u8 reserved[2];
+ struct xcfm_record xcms[MAX_XCFM_COUNT];
+};
+
+struct server_info {
+ u32 vm_id;
+ u64 port_id;
+} __packed;
+
+struct xds_request {
+ struct server_info server_record;
+ char hostname[XSIGO_MAX_HOSTNAME];
+ char os_version[XSIGO_MAX_OS_VERSION_LEN];
+ char os_arch[XSIGO_MAX_OS_ARCH_LEN];
+ uint32_t os_type;
+ uint64_t fw_version;
+ uint32_t hw_version;
+ uint32_t driver_version;
+ uint64_t system_id_l;
+ uint64_t system_id_h;
+ uint32_t reserved; /* For sending capablilties */
+ char build_version[XSIGO_MAX_BUILD_VER_LEN];
+} __packed;
+
+struct ib_xds_mad {
+ struct ib_mad_hdr mad_hdr;
+ u8 reserved[IB_MGMT_SA_HDR - IB_MGMT_MAD_HDR];
+ u8 data[IB_MGMT_SA_DATA];
+} __packed;
+
+/* Discovery solicitation packet.
+ * Sent by server as mcast request to all chassis. (xds_request)
+ * Sent by chassis as unicast response to server. (xcm_rsp_msg_t)
+ */
+#define XDP_MSG_TYPE_DISC_SOL 0x1
+
+#define XDP_FLAGS_REQ 0x1
+#define XDP_FLAGS_RSP 0x2
+
+struct xdp_hdr {
+ uint16_t type;
+ uint16_t len;
+ uint16_t flags;
+ uint32_t reserved1;
+ uint32_t reserved2;
+ uint32_t chksum;
+} __packed;
+
+struct xdds_disc_req {
+ struct xdp_hdr xhdr;
+ struct xds_request req;
+} __packed;
+
+struct xdp_info {
+#define XDP_FABRIC_MTU_1K 0
+#define XDP_FABRIC_MTU_2K 1
+#define XDP_FABRIC_MTU_4K 2
+ uint8_t fabric_mtu;
+ uint8_t xsmp_vlan;
+ uint8_t xsmp_cos;
+ uint8_t resv1;
+ uint32_t reserved[63];
+} __packed;
+
+struct xdds_work {
+ struct work_struct work;
+ u8 *msg;
+ int msg_len;
+ struct xscore_port *port;
+};
+
+#endif /*__XSCORE_XDS_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * This file implements XSMP protocol
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#include "xscore_priv.h"
+#include "xscore.h"
+#include "xs_versions.h"
+#include "xsmp.h"
+#include "xs_compat.h"
+
+#define MAX_XSMP_MSG_SIZE 1024
+
+#define XSMP_SERVICE_ID 0x02139701
+
+#define QUEUE_CONN_DELAY (1000 * 10)
+
+struct xsmp_work {
+ struct work_struct work;
+ struct xsmp_ctx *xsmp_ctx;
+ void *msg;
+ int len;
+ int status;
+};
+
+static struct list_head gxsmp_list;
+static struct idr xsmp_id_table;
+static spinlock_t xsmp_glob_lock;
+u32 xcpm_resource_flags;
+unsigned long xscore_wait_time;
+/*
+ * This mutex is used to protect service structure
+ */
+struct mutex svc_mutex;
+struct mutex xsmp_mutex;
+
+/*
+ * xscore_wait_in_boot will be the one which controls vnics,vhbas wait also
+ * Disable this in ESX , OVM , CITRIX ......
+ */
+int boot_flag = 1;
+int xscore_wait_in_boot = 1;
+module_param(boot_flag, int, 0444);
+module_param(xscore_wait_in_boot, int, 0644);
+
+int xscore_handle_hello_msg;
+module_param(xscore_handle_hello_msg, int, 0444);
+
+int xsigod_enable;
+module_param(xsigod_enable, int, 0444);
+
+static int xsmp_ring_size = 256;
+module_param(xsmp_ring_size, int, 0644);
+static int xscore_sess_wait_time = 600;
+module_param(xscore_sess_wait_time, int, 0644);
+
+#define MAX_NUM_SVCS XSMP_MESSAGE_TYPE_MAX
+
+static struct xsmp_service_reg_info xcpm_services[MAX_NUM_SVCS];
+
+static void xsmp_cleanup_session(struct xsmp_ctx *ctx);
+static int xsmp_session_create(struct xscore_port *port, u64 dguid, u16 dlid);
+static int xsmp_send_resource_list(struct xsmp_ctx *ctx, u32 rflags);
+static int xsmp_sess_disconnect(struct xsmp_ctx *xsmp_ctx);
+static void notify_ulp(struct xsmp_ctx *ctx, int evt);
+
+static struct xsmp_ctx *xsmp_get_ctx(void *cookie)
+{
+ int idr = (int)(unsigned long)cookie;
+ struct xsmp_ctx *ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xsmp_glob_lock, flags);
+ ctx = idr_find(&xsmp_id_table, idr);
+ if (!ctx) {
+ spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+ return NULL;
+ }
+ /*
+ * Increment reference count
+ */
+ atomic_inc(&ctx->ref_cnt);
+ spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+ return ctx;
+}
+
+static void xsmp_put_ctx(struct xsmp_ctx *ctx)
+{
+ atomic_dec(&ctx->ref_cnt);
+}
+
+void xsmp_ulp_notify(struct xscore_port *port, int port_up)
+{
+ struct xsmp_ctx *xsmp_ctx;
+
+ mutex_lock(&xsmp_mutex);
+ list_for_each_entry(xsmp_ctx, &port->xsmp_list, list) {
+ if (port_up)
+ clear_bit(XSMP_IBLINK_DOWN, &xsmp_ctx->flags);
+ else {
+ set_bit(XSMP_IBLINK_DOWN, &xsmp_ctx->flags);
+ clear_bit(XSMP_REG_SENT, &xsmp_ctx->flags);
+ clear_bit(XSMP_REG_CONFIRM_RCVD, &xsmp_ctx->flags);
+ xsmp_ctx->state = XSMP_SESSION_ERROR;
+ }
+ notify_ulp(xsmp_ctx,
+ port_up ? XSCORE_PORT_UP : XSCORE_PORT_DOWN);
+ }
+ mutex_unlock(&xsmp_mutex);
+}
+
+void xsmp_allocate_xsmp_session(struct xscore_port *port, u64 dguid, u16 dlid)
+{
+ struct xsmp_ctx *xsmp_ctx;
+ int found = 0;
+
+ XSMP_FUNCTION("%s dguid: 0x%llx, dlid: 0x%x\n", __func__, dguid, dlid);
+
+ /*
+ * Grab the xsmp mutex. This protects the xsmp list from 3 different
+ * threads.
+ * 1. The port workq through which xsmp session add/delete happens
+ * 2. A rmmod thread (when user issues rmmod) (module unload)
+ * 3. A ULP attaches to XSMP layer (session update list) or deattaches
+ * This happens when xsvnic/xsvhba/uadm is loaded/unloaded
+ */
+ mutex_lock(&xsmp_mutex);
+
+ list_for_each_entry(xsmp_ctx, &port->xsmp_list, list) {
+ if (xsmp_ctx->dguid == dguid && port == xsmp_ctx->port) {
+ /*
+ * We saw the IO director from the same port
+ * (dguid + port)
+ * Now check if we have a LID change
+ */
+ if (dlid != xsmp_ctx->dlid) {
+ XSMP_PRINT
+ ("IO Director %s (GUID: 0x%llx)LID changd ",
+ xsmp_ctx->chassis_name, xsmp_ctx->dguid);
+ XSMP_PRINT("from 0x%x - 0x%x on port: 0x%llx\n",
+ xsmp_ctx->dlid, dlid, port->guid);
+ /*
+ * The connection will get torn down and
+ * reconnect back because of hello timeout
+ */
+ xsmp_ctx->dlid = dlid;
+ xsmp_ctx->conn_ctx.dlid = dlid;
+ }
+ found++;
+ break;
+ }
+ }
+ /*
+ * Did not find an entry, now start an XSMP session
+ * Need to be called in non-irq context
+ */
+ if (!found)
+ xsmp_session_create(port, dguid, dlid);
+
+ mutex_unlock(&xsmp_mutex);
+}
+
+void xsmp_cleanup_stale_xsmp_sessions(struct xscore_port *port, int force)
+{
+ struct xsmp_ctx *xsmp_ctx, *tmp;
+
+ XSMP_FUNCTION("%s:\n", __func__);
+
+ /*
+ * Protect list from rmmod thread/port wq and ULP register/unregister
+ */
+ mutex_lock(&xsmp_mutex);
+
+ list_for_each_entry_safe(xsmp_ctx, tmp, &port->xsmp_list, list) {
+ if (force || test_bit(XSMP_DELETE_BIT, &xsmp_ctx->flags)) {
+ XSMP_PRINT("Deleted XSMP session %s : %s (0x%llx)\n",
+ xsmp_ctx->session_name,
+ xsmp_ctx->chassis_name, xsmp_ctx->dguid);
+ /*
+ * If we are in force mode, notify ULP's that either
+ * 1. module is going away
+ * 2. or underlying hardware driver is going away
+ */
+ if (force)
+ notify_ulp(xsmp_ctx, XSCORE_DEVICE_REMOVAL);
+ xsmp_cleanup_session(xsmp_ctx);
+ }
+ }
+
+ mutex_unlock(&xsmp_mutex);
+}
+
+/*
+ * Need to be called with global spin lock held
+ */
+static int xsmp_send_resource_list_update(void)
+{
+ struct xsmp_ctx *xsmp_ctx;
+
+ mutex_lock(&xsmp_mutex);
+ list_for_each_entry(xsmp_ctx, &gxsmp_list, glist) {
+ xsmp_ctx->counters[XSMP_RES_LIST_COUNTER]++;
+ xsmp_send_resource_list(xsmp_ctx, xcpm_resource_flags);
+ }
+ mutex_unlock(&xsmp_mutex);
+ return 0;
+}
+
+int xcpm_register_service(struct xsmp_service_reg_info *s_info)
+{
+ struct xsmp_service_reg_info *sp;
+ int i = s_info->ctrl_message_type;
+
+ if (i < 1 || i >= MAX_NUM_SVCS)
+ return -EINVAL;
+
+ sp = &xcpm_services[i];
+ /*
+ * Check for duplicate entries
+ */
+ mutex_lock(&svc_mutex);
+ if (sp->svc_state == SVC_STATE_UP) {
+ mutex_unlock(&svc_mutex);
+ return i;
+ }
+ sp->ctrl_message_type = s_info->ctrl_message_type;
+ sp->resource_flag_index = s_info->resource_flag_index;
+ sp->receive_handler = s_info->receive_handler;
+ sp->event_handler = s_info->event_handler;
+ sp->callout_handler = s_info->callout_handler;
+ sp->svc_state = SVC_STATE_UP;
+ /*
+ * Kick start sending resource list list to remote end
+ */
+ xcpm_resource_flags |= (1 << sp->resource_flag_index);
+ xsmp_send_resource_list_update();
+ mutex_unlock(&svc_mutex);
+ return i;
+}
+EXPORT_SYMBOL(xcpm_register_service);
+
+static int xcpm_send_msg_client(struct xsmp_ctx *xsmp_ctx, int svc_id,
+ void *msg, int len)
+{
+ int ret = -ENOTCONN;
+ struct xsmp_service_reg_info *sp = &xcpm_services[svc_id];
+
+ mutex_lock(&svc_mutex);
+ if (sp->svc_state == SVC_STATE_UP && sp->receive_handler) {
+ atomic_inc(&sp->ref_cnt);
+ mutex_unlock(&svc_mutex);
+ sp->receive_handler((void *) (unsigned long)
+ xsmp_ctx->idr, msg, len);
+ ret = 0;
+ atomic_dec(&sp->ref_cnt);
+ } else
+ mutex_unlock(&svc_mutex);
+ return ret;
+}
+
+int xcpm_send_msg_xsigod(void *xsmp_hndl, void *msg, int len)
+{
+ struct xsmp_ctx *ctx;
+ int ret;
+
+ ctx = xsmp_get_ctx(xsmp_hndl);
+ if (!ctx)
+ return -EINVAL;
+
+ if (xcpm_resource_flags & (1 << RESOURCE_FLAG_INDEX_USPACE))
+ ret =
+ xcpm_send_msg_client(ctx, XSMP_MESSAGE_TYPE_USPACE, msg,
+ len);
+ else {
+ xscore_uadm_receive(xsmp_hndl, msg, len);
+ ret = 0;
+ }
+
+ xsmp_put_ctx(ctx);
+ return ret;
+}
+EXPORT_SYMBOL(xcpm_send_msg_xsigod);
+
+int xcpm_unregister_service(int service_id)
+{
+ struct xsmp_service_reg_info *sp = &xcpm_services[service_id];
+
+ mutex_lock(&svc_mutex);
+ if (sp->svc_state == SVC_STATE_UP) {
+ sp->svc_state = SVC_STATE_DOWN;
+ mutex_unlock(&svc_mutex);
+ while (atomic_read(&sp->ref_cnt))
+ msleep(20);
+ xcpm_resource_flags &= ~(1 << sp->resource_flag_index);
+ /*
+ * Send updated list
+ */
+ xsmp_send_resource_list_update();
+ } else
+ mutex_unlock(&svc_mutex);
+ return 0;
+}
+EXPORT_SYMBOL(xcpm_unregister_service);
+
+void *xcpm_alloc_msg(int sz)
+{
+ return kmalloc(sz, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(xcpm_alloc_msg);
+
+void xcpm_free_msg(void *msg)
+{
+ kfree(msg);
+}
+EXPORT_SYMBOL(xcpm_free_msg);
+
+int xcpm_is_xsigod_enabled(void)
+{
+ return xsigod_enable;
+}
+EXPORT_SYMBOL(xcpm_is_xsigod_enabled);
+
+static inline void change_header_byte_order(struct xsmp_message_header
+ *m_header)
+{
+ m_header->length = cpu_to_be16(m_header->length);
+ m_header->seq_number = cpu_to_be32(m_header->seq_number);
+ m_header->source_id.node_id_primary =
+ cpu_to_be64(m_header->source_id.node_id_primary);
+ m_header->dest_id.node_id_primary =
+ cpu_to_be64(m_header->dest_id.node_id_primary);
+}
+
+static inline void change_session_byte_order(struct xsmp_session_msg *m_session)
+{
+ m_session->length = cpu_to_be16(m_session->length);
+ m_session->resource_flags = cpu_to_be32(m_session->resource_flags);
+ m_session->version = cpu_to_be32(m_session->version);
+ m_session->chassis_version = cpu_to_be32(m_session->chassis_version);
+ m_session->boot_flags = cpu_to_be32(m_session->boot_flags);
+ m_session->fw_ver = cpu_to_be64(m_session->fw_ver);
+ m_session->hw_ver = cpu_to_be32(m_session->hw_ver);
+ m_session->vendor_part_id = cpu_to_be32(m_session->vendor_part_id);
+}
+
+int xcpm_get_xsmp_session_info(void *xsmp_hndl,
+ struct xsmp_session_info *ip)
+{
+ struct xsmp_ctx *ctx;
+
+ ctx = xsmp_get_ctx(xsmp_hndl);
+ if (!ctx)
+ return -EINVAL;
+
+ strncpy(ip->chassis_name, ctx->chassis_name,
+ sizeof(ip->chassis_name) - 1);
+ ip->chassis_name[sizeof(ip->chassis_name) - 1] = 0;
+ strncpy(ip->session_name, ctx->session_name,
+ sizeof(ip->session_name) - 1);
+ ip->session_name[sizeof(ip->session_name) - 1] = 0;
+ ip->version = ctx->xsigo_xsmp_version;
+ ip->port = ctx->port;
+ ip->ib_device = ctx->port->xs_dev->device;
+ ip->dma_device = ctx->port->xs_dev->device->dma_device;
+ ip->pd = ctx->port->xs_dev->pd;
+ ip->mr = ctx->port->xs_dev->mr;
+ ip->is_shca = ctx->port->xs_dev->is_shca;
+ ip->dguid = ctx->dguid;
+ xsmp_put_ctx(ctx);
+ return 0;
+}
+EXPORT_SYMBOL(xcpm_get_xsmp_session_info);
+
+int xcpm_check_duplicate_names(void *xsmp_hndl, char *name, u8 svc_id)
+{
+ int ret = 0;
+ struct xsmp_service_reg_info *sp = &xcpm_services[svc_id];
+ struct net_device *chk_netdev;
+
+ if (strcmp(name, VMWARE_RESERVED_KEYS) == 0) {
+ pr_err("%s %s is not supported vnic name ", __func__, name);
+ pr_err("(it is a reserved keyword for esx5.0)\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ chk_netdev = dev_get_by_name(&init_net, name);
+ if (chk_netdev != NULL) {
+ ret = -EINVAL;
+ pr_info("%s !!Warning!! NIC %s is already", __func__, name);
+ pr_info("present in system\n");
+ dev_put(chk_netdev);
+ goto out;
+ }
+
+ mutex_lock(&svc_mutex);
+ if (sp->svc_state == SVC_STATE_UP && sp->callout_handler) {
+ atomic_inc(&sp->ref_cnt);
+ mutex_unlock(&svc_mutex);
+ ret = sp->callout_handler(name);
+ atomic_dec(&sp->ref_cnt);
+ } else
+ mutex_unlock(&svc_mutex);
+out:
+ return ret;
+}
+EXPORT_SYMBOL(xcpm_check_duplicate_names);
+
+int xcpm_send_message(void *hndl, int svc_id, u8 *msg, int len)
+{
+ unsigned long flags;
+ struct xsmp_ctx *ctx;
+ int ret;
+ struct xsmp_message_header *m_header;
+
+ m_header = (struct xsmp_message_header *)msg;
+
+ ctx = xsmp_get_ctx(hndl);
+ if (!ctx)
+ return -EINVAL;
+ /*
+ * Now check state of XSMP
+ */
+ spin_lock_irqsave(&ctx->lock, flags);
+ if (ctx->state != XSMP_SESSION_CONNECTED) {
+ ctx->counters[XSMP_SESSION_CONN_DOWN_COUNTER]++;
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ xsmp_put_ctx(ctx);
+ return -ENOTCONN;
+ }
+ /*
+ * Fix sequence number and GUID
+ */
+ m_header->seq_number = cpu_to_be32(ctx->seq_number++);
+ m_header->source_id.node_id_primary = cpu_to_be64(ctx->port->guid);
+ m_header->source_id.node_id_aux = 0;
+ m_header->dest_id.node_id_aux = 0;
+ m_header->dest_id.node_id_primary = cpu_to_be64(ctx->dguid);
+ ret =
+ xscore_post_send(&ctx->conn_ctx, m_header, len,
+ XSCORE_DEFER_PROCESS);
+ ctx->counters[XSMP_TOTAL_MSG_SENT_COUNTER]++;
+ switch (svc_id) {
+ case XSMP_MESSAGE_TYPE_VNIC:
+ ctx->counters[XSMP_VNIC_MESSAGE_SENT_COUNTER]++;
+ break;
+ case XSMP_MESSAGE_TYPE_VHBA:
+ ctx->counters[XSMP_VHBA_MESSAGE_SENT_COUNTER]++;
+ break;
+ case XSMP_MESSAGE_TYPE_USPACE:
+ ctx->counters[XSMP_USPACE_MESSAGE_SENT_COUNTER]++;
+ break;
+ case XSMP_MESSAGE_TYPE_XVE:
+ ctx->counters[XSMP_XVE_MESSAGE_SENT_COUNTER]++;
+ break;
+ default:
+ break;
+ }
+ if (ret) {
+ if (ret == -ENOBUFS)
+ ctx->counters[XSMP_SESSION_RING_FULL_COUNTER]++;
+ else
+ ctx->counters[XSMP_SESSION_SEND_ERROR_COUNTER]++;
+ }
+ spin_unlock_irqrestore(&ctx->lock, flags);
+ xsmp_put_ctx(ctx);
+ return ret;
+}
+EXPORT_SYMBOL(xcpm_send_message);
+
+/*
+ * XSMP session will be considered to "match" (i.e. are the
+ * same logical communication path) if the remote (destination) GUID
+ * and the session (aka server profile name) are identical.
+ * GUIDs by definition should be unique and there is a requirement
+ * that each server profile name on a given chassis be unique.
+ */
+int xsmp_sessions_match(struct xsmp_session_info *infop, void *cookie)
+{
+ struct xsmp_ctx *ctx;
+ int rc;
+
+ ctx = xsmp_get_ctx(cookie);
+ if (!ctx)
+ return 0;
+ rc = ((infop->dguid == ctx->dguid)
+ && (strncmp(infop->session_name, ctx->session_name,
+ SESSION_NAME_LEN) == 0));
+ xsmp_put_ctx(ctx);
+ return rc;
+}
+EXPORT_SYMBOL(xsmp_sessions_match);
+
+void xscore_wait_for_link_up(void)
+{
+ struct xscore_port *port;
+ int time, delayms = 1000;
+ int timeoutsecs = 90;
+ struct ib_port_attr port_attr;
+ int all_up;
+
+ for (time = 0; time < timeoutsecs * 1000; time += delayms) {
+ all_up = 1;
+ mutex_lock(&xscore_port_mutex);
+ list_for_each_entry(port, &xscore_port_list, gport_list) {
+ (void)ib_query_port(port->xs_dev->device,
+ port->port_num, &port_attr);
+ if (port_attr.state != IB_PORT_ACTIVE) {
+ all_up = 0;
+ continue;
+ }
+ }
+ mutex_unlock(&xscore_port_mutex);
+ if (all_up)
+ break;
+ msleep(delayms);
+ }
+}
+
+void xscore_wait_for_xds_resp(void)
+{
+ struct xscore_port *port;
+ int time, delayms = 1000;
+ int timeoutsecs = 30;
+ struct ib_port_attr port_attr;
+ int all_ok;
+
+ for (time = 0; time < timeoutsecs * 1000; time += delayms) {
+ all_ok = 1;
+ mutex_lock(&xscore_port_mutex);
+ list_for_each_entry(port, &xscore_port_list, gport_list) {
+ (void)ib_query_port(port->xs_dev->device,
+ port->port_num, &port_attr);
+ if (port_attr.state != IB_PORT_ACTIVE)
+ continue;
+ /*
+ * Check if XDS bit is set
+ */
+ if (!test_bit(XSCORE_SP_PRESENT, &port->flags)
+ && !test_bit(XSCORE_SP_NOT_PRESENT, &port->flags))
+ all_ok = 0;
+ }
+ mutex_unlock(&xscore_port_mutex);
+ if (all_ok)
+ break;
+ msleep(delayms);
+ }
+}
+
+/*
+ * This is used the xsigoboot driver to verify all XSMP sessions are up
+ */
+int xsmp_sessions_up(void)
+{
+ struct xsmp_ctx *xsmp_ctx;
+ int n = 0;
+
+ mutex_lock(&xsmp_mutex);
+ if (list_empty(&gxsmp_list)) {
+ /*
+ * If XSMP list is empty mark all sessions up
+ */
+ n = 1;
+ goto out;
+ }
+ list_for_each_entry(xsmp_ctx, &gxsmp_list, glist) {
+ if (xsmp_ctx->state != XSMP_SESSION_CONNECTED) {
+ n = 0;
+ break;
+ }
+ n++;
+ }
+out:
+ mutex_unlock(&xsmp_mutex);
+ return n > 0;
+}
+
+/*
+ * wait for the XSMP sessions to come up.
+ */
+int xscore_wait_for_sessions(u8 cal_time)
+{
+ unsigned long init_time;
+ int time, ret = 0, delayms = 1000;
+ int timeoutsecs = xscore_sess_wait_time;
+
+ init_time = jiffies;
+
+ if (!xscore_wait_in_boot)
+ goto out;
+
+ if (cal_time)
+ pr_info("XSCORE: Waiting for XSMP Session to come up .....\n");
+ else {
+ mutex_lock(&xsmp_mutex);
+ if (list_empty(&gxsmp_list))
+ ret = 0;
+ else
+ ret = 1;
+ mutex_unlock(&xsmp_mutex);
+ return ret;
+ }
+
+ xscore_wait_for_link_up();
+
+ xscore_wait_for_xds_resp();
+
+ for (time = 0; time < timeoutsecs * 1000; time += delayms) {
+ if (xsmp_sessions_up()) {
+ XSMP_INFO("XSMP Sessions are up\n");
+ ret = delayms;
+ goto out;
+ }
+ msleep(delayms);
+ XSMP_INFO("Waiting for XSMP Session to be up\n");
+ }
+ XSMP_INFO("XSMP Sessions are not up\n");
+
+out:
+ if (cal_time)
+ xscore_wait_time = jiffies - init_time;
+ return ret;
+}
+EXPORT_SYMBOL(xscore_wait_for_sessions);
+
+static int send_xsmp_sess_msg(struct xsmp_ctx *ctxp, u8 type, u32 rflags)
+{
+ struct xsmp_session_msg *m_session;
+ struct xsmp_message_header *m_header;
+ unsigned long flags;
+ int ret = 0;
+ int len;
+
+ m_header = kmalloc(MAX_XSMP_MSG_SIZE, GFP_ATOMIC);
+ if (!m_header)
+ return -ENOMEM;
+ spin_lock_irqsave(&ctxp->lock, flags);
+ if (ctxp->state < XSMP_SESSION_TPT_CONNECTED
+ || ctxp->state > XSMP_SESSION_CONNECTED) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+ m_session = (struct xsmp_session_msg *)(m_header + 1);
+
+ m_header->type = XSMP_MESSAGE_TYPE_SESSION;
+ len = m_header->length = sizeof(*m_header) + sizeof(*m_session);
+
+ m_header->source_id.node_id_primary = ctxp->port->guid;
+ m_header->source_id.node_id_aux = 0;
+ m_header->dest_id.node_id_primary = ctxp->dguid;
+ m_header->dest_id.node_id_aux = 0;
+ m_header->seq_number = ctxp->seq_number++;
+
+ m_session->type = type;
+ m_session->length = sizeof(*m_session);
+ m_session->resource_flags = rflags | RESOURCE_OS_TYPE_LINUX;
+ m_session->version = XSIGO_LINUX_DRIVER_VERSION;
+ m_session->chassis_version = MINIMUM_XSIGOS_VERSION;
+ m_session->boot_flags = boot_flag;
+ m_session->fw_ver = ctxp->port->xs_dev->fw_ver;
+ m_session->hw_ver = ctxp->port->xs_dev->hw_ver;
+ m_session->vendor_part_id = ctxp->port->xs_dev->vendor_part_id;
+
+ change_header_byte_order(m_header);
+ change_session_byte_order(m_session);
+ ret =
+ xscore_post_send(&ctxp->conn_ctx, m_header, len,
+ XSCORE_DEFER_PROCESS);
+ ctxp->counters[XSMP_TOTAL_MSG_SENT_COUNTER]++;
+ ctxp->counters[XSMP_SESSION_MESSAGE_SENT_COUNTER]++;
+ if (ret) {
+ if (ret == -ENOBUFS)
+ ctxp->counters[XSMP_SESSION_RING_FULL_COUNTER]++;
+ else
+ ctxp->counters[XSMP_SESSION_SEND_ERROR_COUNTER]++;
+ }
+out:
+ spin_unlock_irqrestore(&ctxp->lock, flags);
+ if (ret)
+ kfree(m_header);
+ return ret;
+}
+
+static int xsmp_send_register_msg(struct xsmp_ctx *ctx, u32 rflags)
+{
+ return send_xsmp_sess_msg(ctx, XSMP_SESSION_REGISTER, rflags);
+}
+
+static int xsmp_send_hello_msg(struct xsmp_ctx *ctx)
+{
+ return send_xsmp_sess_msg(ctx, XSMP_SESSION_HELLO, 0);
+}
+
+int xsmp_send_resource_list(struct xsmp_ctx *ctx, u32 rflags)
+{
+ return send_xsmp_sess_msg(ctx, XSMP_SESSION_RESOURCE_LIST, rflags);
+}
+
+int xsmp_send_shutdown(struct xsmp_ctx *ctx)
+{
+ return send_xsmp_sess_msg(ctx, XSMP_SESSION_SHUTDOWN, 0);
+}
+
+static void handle_reg_confirm_msg(struct xsmp_ctx *ctx,
+ struct xsmp_session_msg *m_session)
+{
+ int hello_interval = m_session->version;
+ int datapath_timeout = m_session->resource_flags;
+
+ XSMP_INFO("Rcvd XSMP_SESSION_REG_CONFIRM from 0x%llx\n", ctx->dguid);
+ set_bit(XSMP_REG_CONFIRM_RCVD, &ctx->flags);
+ ctx->counters[XSMP_REG_CONF_COUNTER]++;
+ ctx->state = XSMP_SESSION_CONNECTED;
+ ctx->hello_timeout = msecs_to_jiffies(hello_interval * 3 * 1000);
+
+ if (datapath_timeout != -1)
+ ctx->datapath_timeout = (hello_interval * 3) * 2;
+ else
+ ctx->datapath_timeout = -1;
+
+ ctx->xsigo_xsmp_version = ntohl(m_session->xsigo_xsmp_version);
+ memcpy(ctx->chassis_name, m_session->chassis_name, CHASSIS_NAME_LEN);
+ ctx->chassis_name[CHASSIS_NAME_LEN - 1] = '\0';
+ memcpy(ctx->session_name, m_session->session_name, SESSION_NAME_LEN);
+ ctx->session_name[SESSION_NAME_LEN - 1] = '\0';
+ XSMP_PRINT("Established XSMP session (%s) to chassis (%s)\n",
+ ctx->session_name, ctx->chassis_name);
+}
+
+static int is_seq_number_ok(struct xsmp_ctx *ctx,
+ struct xsmp_message_header *hdr)
+{
+ int ok = 1;
+
+ if (ctx->rcv_seq_number != be32_to_cpu(hdr->seq_number)) {
+ XSMP_INFO("XSMP Session 0x%llx", ctx->dguid);
+ XSMP_INFO("Seq number mismatch: exp: 0x%x, actual: 0x%x\n",
+ ctx->rcv_seq_number, be32_to_cpu(hdr->seq_number));
+ ctx->counters[XSMP_SEQ_MISMATCH_COUNTER]++;
+ ok = 0;
+ }
+ ctx->rcv_seq_number++;
+ return ok;
+}
+
+static void handle_hello_msg(struct xsmp_ctx *ctx,
+ struct xsmp_message_header *hdr)
+{
+ XSMP_INFO("Rcvd XSMP_SESSION_HELLO from 0x%llx\n", ctx->dguid);
+ ctx->hello_jiffies = jiffies;
+ if (xsmp_send_hello_msg(ctx)) {
+ /*
+ * Mark connection as bad and reconnect
+ */
+ } else {
+ ctx->counters[XSMP_HELLO_SENT_COUNTER]++;
+ }
+}
+
+static int xsmp_process_xsmp_session_type(struct xsmp_ctx *ctx, void *msg,
+ int length)
+{
+ struct xsmp_message_header *m_header = msg;
+ struct xsmp_session_msg *m_session =
+ (struct xsmp_session_msg *)(m_header + 1);
+
+ XSMP_FUNCTION("%s: Processing message from GUID: %llx\n",
+ __func__, ctx->dguid);
+
+ if (length < sizeof(*m_header)) {
+ kfree(msg);
+ return -EINVAL;
+ }
+ change_header_byte_order(m_header);
+ if (length > m_header->length) {
+ kfree(msg);
+ return -EINVAL;
+ }
+ change_session_byte_order(m_session);
+
+ switch (m_session->type) {
+ case XSMP_SESSION_REG_CONFIRM:
+ handle_reg_confirm_msg(ctx, m_session);
+ set_bit(XSMP_REG_CONFIRM_RCVD, &ctx->flags);
+ break;
+ case XSMP_SESSION_HELLO:
+ ctx->counters[XSMP_HELLO_RCVD_COUNTER]++;
+ handle_hello_msg(ctx, m_header);
+ break;
+ case XSMP_SESSION_REG_REJECT:
+ ctx->counters[XSMP_REJ_RCVD_COUNTER]++;
+ set_bit(XSMP_SHUTTINGDOWN_BIT, &ctx->flags);
+ XSMP_PRINT("XSMP REJECT received session %s : %s (0x%llx)\n",
+ ctx->session_name, ctx->chassis_name, ctx->dguid);
+ break;
+ case XSMP_SESSION_SHUTDOWN:
+ ctx->counters[XSMP_SHUTDOWN_RCVD_COUNTER]++;
+ XSMP_PRINT("XSMP shutdown received session %s : %s (0x%llx)\n",
+ ctx->session_name, ctx->chassis_name, ctx->dguid);
+ set_bit(XSMP_SHUTTINGDOWN_BIT, &ctx->flags);
+ break;
+ default:
+ break;
+ }
+ kfree(msg);
+ return 0;
+}
+
+static void xsmp_cleanup_session(struct xsmp_ctx *xsmp_ctx)
+{
+ unsigned long flags, flags1;
+ /*
+ * Now delete the entry from the list & idr
+ */
+ XSMP_FUNCTION("%s: Cleaning up 0x%llx\n", __func__, xsmp_ctx->dguid);
+ xcpm_xsmp_remove_proc_entry(xsmp_ctx);
+ spin_lock_irqsave(&xsmp_glob_lock, flags);
+ idr_remove(&xsmp_id_table, xsmp_ctx->idr);
+ xsmp_ctx->idr = -1;
+ spin_lock_irqsave(&xsmp_ctx->lock, flags1);
+ set_bit(XSMP_SHUTTINGDOWN_BIT, &xsmp_ctx->flags);
+ spin_unlock_irqrestore(&xsmp_ctx->lock, flags1);
+ spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+ /*
+ * Now disconnect and cleanup connection
+ */
+ (void)xsmp_sess_disconnect(xsmp_ctx);
+
+ if (cancel_delayed_work(&xsmp_ctx->sm_work))
+ xsmp_put_ctx(xsmp_ctx);
+ /*
+ * Wait for reference count to goto zero
+ */
+ while (atomic_read(&xsmp_ctx->ref_cnt))
+ msleep(100);
+
+ xscore_conn_destroy(&xsmp_ctx->conn_ctx);
+ spin_lock_irqsave(&xsmp_glob_lock, flags);
+ list_del(&xsmp_ctx->list);
+ list_del(&xsmp_ctx->glist);
+ spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+ kfree(xsmp_ctx);
+}
+
+static int xsmp_check_msg_type(struct xsmp_ctx *xsmp_ctx, void *msg)
+{
+ struct xsmp_session_msg *m_session = { 0 };
+ struct xsmp_message_header *m_header =
+ (struct xsmp_message_header *)msg;
+ int ret = 1;
+
+ switch (m_header->type) {
+ case XSMP_MESSAGE_TYPE_SESSION:
+ m_session = (struct xsmp_session_msg *)(m_header + 1);
+ if (m_session->type == XSMP_SESSION_HELLO)
+ ret = 0;
+ break;
+ default:
+ break;
+
+ }
+ return ret;
+}
+
+/*
+ * Executes in workq/thread context
+ * Potentially can use idr here XXX
+ */
+static void xsmp_process_recv_msgs(struct work_struct *work)
+{
+ struct xsmp_work *xwork = container_of(work, struct xsmp_work,
+ work);
+ struct xsmp_message_header *m_header = xwork->msg;
+ struct xsmp_ctx *xsmp_ctx = xwork->xsmp_ctx;
+ int sendup = 0;
+
+ xscore_set_wq_state(XSCORE_WQ_XSMP_PROC_MSG);
+ is_seq_number_ok(xsmp_ctx, m_header);
+
+ switch (m_header->type) {
+ case XSMP_MESSAGE_TYPE_VNIC:
+ xsmp_ctx->counters[XSMP_VNIC_MESSAGE_COUNTER]++;
+ sendup++;
+ break;
+ case XSMP_MESSAGE_TYPE_VHBA:
+ xsmp_ctx->counters[XSMP_VHBA_MESSAGE_COUNTER]++;
+ sendup++;
+ break;
+ case XSMP_MESSAGE_TYPE_USPACE:
+ xsmp_ctx->counters[XSMP_USPACE_MESSAGE_COUNTER]++;
+ sendup++;
+ break;
+ case XSMP_MESSAGE_TYPE_XVE:
+ xsmp_ctx->counters[XSMP_XVE_MESSAGE_COUNTER]++;
+ sendup++;
+ break;
+ case XSMP_MESSAGE_TYPE_SESSION:
+ xsmp_ctx->counters[XSMP_SESSION_MESSAGE_COUNTER]++;
+ xsmp_process_xsmp_session_type(xwork->xsmp_ctx, xwork->msg,
+ xwork->len);
+ break;
+ default:
+ kfree(xwork->msg);
+ XSMP_ERROR("%s: Unknown message type: %d\n", __func__,
+ m_header->type);
+ break;
+ }
+ if (sendup) {
+ if (xcpm_send_msg_client
+ (xsmp_ctx, m_header->type, xwork->msg, xwork->len))
+ kfree(xwork->msg);
+ }
+ kfree(xwork);
+ xsmp_put_ctx(xsmp_ctx);
+ xscore_clear_wq_state(XSCORE_WQ_XSMP_PROC_MSG);
+}
+
+static void queue_sm_work(struct xsmp_ctx *xsmp_ctx, int msecs)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&xsmp_ctx->lock, flags);
+ if (!test_bit(XSMP_SHUTTINGDOWN_BIT, &xsmp_ctx->flags)) {
+ atomic_inc(&xsmp_ctx->ref_cnt);
+ queue_delayed_work(xsmp_ctx->wq, &xsmp_ctx->sm_work,
+ msecs_to_jiffies(msecs));
+ } else
+ set_bit(XSMP_DELETE_BIT, &xsmp_ctx->flags);
+ spin_unlock_irqrestore(&xsmp_ctx->lock, flags);
+}
+
+static int xsmp_sess_disconnect(struct xsmp_ctx *xsmp_ctx)
+{
+ xsmp_ctx->state = XSMP_SESSION_DISCONNECTED;
+ (void)xscore_conn_disconnect(&xsmp_ctx->conn_ctx, 0);
+ return 0;
+}
+
+static int xsmp_sess_connect(struct xsmp_ctx *xsmp_ctx)
+{
+ int ret = 0;
+
+ switch (xsmp_ctx->state) {
+ case XSMP_SESSION_ERROR:
+ case XSMP_SESSION_INIT:
+ case XSMP_SESSION_DISCONNECTED:
+ xsmp_ctx->counters[XSMP_CONN_RETRY_COUNTER]++;
+ xsmp_ctx->rcv_seq_number = 1;
+ xsmp_ctx->seq_number = 1;
+ xsmp_ctx->jiffies = jiffies;
+ xsmp_ctx->state = XSMP_SESSION_TPT_CONNECTING;
+ clear_bit(XSMP_REG_SENT, &xsmp_ctx->flags);
+ clear_bit(XSMP_REG_CONFIRM_RCVD, &xsmp_ctx->flags);
+ XSMP_INFO("%s: Session to 0x%llx, Trying\n", __func__,
+ xsmp_ctx->dguid);
+ ret = xscore_conn_connect(&xsmp_ctx->conn_ctx,
+ XSCORE_SYNCHRONOUS);
+ if (ret) {
+ xsmp_ctx->counters[XSMP_CONN_FAILED_COUNTER]++;
+ XSMP_INFO("%s: Session %s:%s to 0x%llx Failed ret %d\n",
+ __func__, xsmp_ctx->session_name,
+ xsmp_ctx->chassis_name, xsmp_ctx->dguid, ret);
+ ret = -ENOTCONN;
+ } else {
+ XSMP_INFO("%s: Session to 0x%llx successful\n",
+ __func__, xsmp_ctx->dguid);
+ xsmp_ctx->counters[XSMP_CONN_SUCCESS_COUNTER]++;
+ xsmp_ctx->jiffies = jiffies;
+ xsmp_ctx->hello_jiffies = jiffies;
+ xsmp_ctx->state = XSMP_SESSION_CONNECTING;
+ if (xsmp_send_register_msg
+ (xsmp_ctx, xcpm_resource_flags)) {
+ XSMP_ERROR("REGISTER_MESSAGE failed");
+ XSMP_ERROR("to GUID:0x%llx\n", xsmp_ctx->dguid);
+ } else {
+ set_bit(XSMP_REG_SENT, &xsmp_ctx->flags);
+ xsmp_ctx->counters[XSMP_REG_SENT_COUNTER]++;
+ }
+ }
+ break;
+ default:
+ XSMP_ERROR("%s:Connect called in wrong state, %d\n",
+ __func__, xsmp_ctx->state);
+ break;
+ }
+ return ret;
+}
+
+static void xsmp_state_machine(struct xsmp_ctx *xsmp_ctx)
+{
+ if (xsmp_ctx->state == XSMP_SESSION_CONNECTED ||
+ xsmp_ctx->state == XSMP_SESSION_CONNECTING) {
+ xsmp_ctx->sm_delay = 10000;
+ /*
+ * Check hello time stamp
+ */
+ if (!boot_flag
+ && (((long)jiffies - (long)xsmp_ctx->hello_jiffies) >
+ (long)xsmp_ctx->hello_timeout)) {
+ /*
+ * Reconnect
+ */
+ XSMP_PRINT("XSMROR: trailing whitespacesis");
+ XSMP_PRINT("(%s) expired..Reconnecting %s\n",
+ xsmp_ctx->session_name,
+ xsmp_ctx->chassis_name);
+
+ xsmp_ctx->counters[XSMP_SESSION_TIMEOUT_COUNTER]++;
+ } else
+ return;
+ }
+ xsmp_ctx->sm_delay = 2000;
+ (void)xsmp_sess_disconnect(xsmp_ctx);
+ if (!test_bit(XSMP_IBLINK_DOWN, &xsmp_ctx->flags))
+ xsmp_sess_connect(xsmp_ctx);
+}
+
+static void xsmp_state_machine_work(struct work_struct *work)
+{
+ struct xsmp_ctx *xsmp_ctx = container_of(work, struct xsmp_ctx,
+ sm_work.work);
+ xscore_set_wq_state(XSCORE_DWQ_SM_WORK);
+ if (!test_bit(XSMP_SHUTTINGDOWN_BIT, &xsmp_ctx->flags))
+ xsmp_state_machine(xsmp_ctx);
+ queue_sm_work(xsmp_ctx, xsmp_ctx->sm_delay);
+ xsmp_put_ctx(xsmp_ctx);
+ xscore_clear_wq_state(XSCORE_DWQ_SM_WORK);
+}
+
+/*
+ * Called from interrupt context
+ */
+void xsmp_send_handler(void *client_arg, void *msg, int status, int n)
+{
+ struct xsmp_ctx *xsmp_ctx = client_arg;
+
+ XSMP_INFO("%s: Status %d, GUID: 0x%llx\n", __func__, status,
+ xsmp_ctx->dguid);
+ if (status) {
+ XSMP_ERROR
+ ("XSMP: %s:%s Send Completion error: 0x%llx, status %d\n",
+ xsmp_ctx->session_name, xsmp_ctx->chassis_name,
+ xsmp_ctx->dguid, status);
+ xsmp_ctx->state = XSMP_SESSION_ERROR;
+ }
+ kfree(msg);
+}
+
+/*
+ * Called from interrupt context
+ */
+void xsmp_recv_handler(void *client_arg, void *msg, int sz, int status, int n)
+{
+ struct xsmp_ctx *xsmp_ctx = client_arg;
+ struct xsmp_work *work;
+ unsigned long flags;
+
+ if (status) {
+ /*
+ * XXX mark connection as bad and
+ * it reconnect (hello timer will kick in)
+ */
+ XSMP_ERROR
+ ("XSMP: %s:%s Recv Completion error: 0x%llx, status %d\n",
+ xsmp_ctx->session_name, xsmp_ctx->chassis_name,
+ xsmp_ctx->dguid, status);
+ xsmp_ctx->state = XSMP_SESSION_ERROR;
+ kfree(msg);
+ return;
+ }
+ if (xscore_handle_hello_msg && !xsmp_check_msg_type(xsmp_ctx, msg)) {
+ xsmp_ctx->counters[XSMP_SESSION_MESSAGE_COUNTER]++;
+ xsmp_ctx->counters[XSMP_HELLO_INTERRUPT_COUNTER]++;
+ xsmp_process_xsmp_session_type(xsmp_ctx, msg, sz);
+ return;
+ }
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ kfree(msg);
+ return;
+ }
+ INIT_WORK(&work->work, xsmp_process_recv_msgs);
+ work->xsmp_ctx = xsmp_ctx;
+ work->msg = msg;
+ work->len = sz;
+ work->status = status;
+
+ spin_lock_irqsave(&xsmp_ctx->lock, flags);
+ if (!test_bit(XSMP_SHUTTINGDOWN_BIT, &xsmp_ctx->flags)) {
+ atomic_inc(&xsmp_ctx->ref_cnt);
+ queue_work(xsmp_ctx->wq, &work->work);
+ } else {
+ kfree(msg);
+ kfree(work);
+ }
+ spin_unlock_irqrestore(&xsmp_ctx->lock, flags);
+}
+
+static void notify_ulp(struct xsmp_ctx *ctx, int evt)
+{
+ int i;
+ struct xsmp_service_reg_info *sp;
+
+ mutex_lock(&svc_mutex);
+ for (i = 1; i < MAX_NUM_SVCS; i++) {
+ sp = &xcpm_services[i];
+ if (sp->svc_state == SVC_STATE_UP && sp->event_handler) {
+ atomic_inc(&sp->ref_cnt);
+ mutex_unlock(&svc_mutex);
+ sp->event_handler((void *) (unsigned long)
+ ctx->idr, evt);
+ atomic_dec(&sp->ref_cnt);
+ mutex_lock(&svc_mutex);
+ }
+ }
+ mutex_unlock(&svc_mutex);
+}
+
+/*
+ * Called from CM thread context, if you want delayed
+ * processing, post to local thread
+ */
+void xsmp_event_handler(void *client_arg, int event)
+{
+ struct xsmp_ctx *xsmp_ctx = client_arg;
+
+ switch (event) {
+ case XSCORE_CONN_CONNECTED:
+ XSMP_INFO("XSCORE_CONN_CONNECTED: GUID: 0x%llx\n",
+ xsmp_ctx->dguid);
+ break;
+ case XSCORE_CONN_ERR:
+ xsmp_ctx->state = XSMP_SESSION_ERROR;
+ XSMP_INFO("XSCORE_CONN_ERR: GUID: 0x%llx\n", xsmp_ctx->dguid);
+ break;
+ case XSCORE_CONN_RDISCONNECTED:
+ xsmp_ctx->state = XSMP_SESSION_DISCONNECTED;
+ XSMP_INFO("XSCORE_CONN_RDISCONNECTED: GUID: 0x%llx\n",
+ xsmp_ctx->dguid);
+ break;
+ case XSCORE_CONN_LDISCONNECTED:
+ xsmp_ctx->state = XSMP_SESSION_DISCONNECTED;
+ XSMP_INFO("XSCORE_CONN_LDISCONNECTED: GUID: 0x%llx\n",
+ xsmp_ctx->dguid);
+ break;
+ default:
+ break;
+ }
+ notify_ulp(xsmp_ctx, event);
+}
+
+struct xsmp_private_data {
+ u8 is_checksum;
+ u32 reserved[6];
+} __packed;
+
+int xsmp_session_create(struct xscore_port *port, u64 dguid, u16 dlid)
+{
+ struct xsmp_ctx *xsmp_ctx;
+ unsigned long flags;
+ static int next_id = 1;
+ int ret;
+ struct xscore_conn_ctx *cctx;
+ struct xsmp_private_data *cmp;
+
+ XSMP_FUNCTION("%s: dguid: 0x%llx, dlid: 0x%x\n", __func__, dguid, dlid);
+
+ xsmp_ctx = kzalloc(sizeof(*xsmp_ctx), GFP_ATOMIC);
+ if (!xsmp_ctx)
+ return -ENOMEM;
+ spin_lock_init(&xsmp_ctx->lock);
+
+ cctx = &xsmp_ctx->conn_ctx;
+ memset(cctx, 0, sizeof(*cctx));
+ cctx->tx_ring_size = xsmp_ring_size;
+ cctx->rx_ring_size = xsmp_ring_size;
+ cctx->rx_buf_size = MAX_XSMP_MSG_SIZE;
+ cctx->client_arg = xsmp_ctx;
+ cctx->event_handler = xsmp_event_handler;
+ cctx->send_compl_handler = xsmp_send_handler;
+ cctx->recv_msg_handler = xsmp_recv_handler;
+ cctx->dguid = dguid;
+ cctx->dlid = dlid;
+ cctx->service_id = be64_to_cpu(XSMP_SERVICE_ID);
+
+ cmp = (struct xsmp_private_data *)cctx->priv_data;
+ cctx->priv_data_len = sizeof(*cmp);
+ if (port->xs_dev->is_shca && shca_csum) {
+ cmp->is_checksum = 1;
+ cctx->features |= XSCORE_USE_CHECKSUM;
+ } else {
+ cmp->is_checksum = 0;
+ cctx->features &= ~XSCORE_USE_CHECKSUM;
+ }
+
+ ret = xscore_conn_init(&xsmp_ctx->conn_ctx, port);
+ if (ret) {
+ XSMP_ERROR("xscore_conn_init error %d\n", ret);
+ kfree(xsmp_ctx);
+ return ret;
+ }
+ xsmp_ctx->state = XSMP_SESSION_INIT;
+ xsmp_ctx->dguid = dguid;
+ xsmp_ctx->dlid = dlid;
+ xsmp_ctx->port = port;
+ xsmp_ctx->wq = port->port_wq;
+ xsmp_ctx->hello_timeout = msecs_to_jiffies(60 * 1000);
+
+ spin_lock_irqsave(&xsmp_glob_lock, flags);
+ xsmp_ctx->idr = idr_alloc(&xsmp_id_table, xsmp_ctx, next_id++,
+ 0, GFP_KERNEL);
+ spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+ if (xsmp_ctx->idr < 0) {
+ XSMP_FUNCTION("%s: dguid: 0x%llx, Failed to get retr%d idr%d \n",
+ __func__, dguid, xsmp_ctx->idr, next_id);
+ ret = -1;
+ return ret;
+ }
+
+ INIT_DELAYED_WORK(&xsmp_ctx->sm_work, xsmp_state_machine_work);
+ spin_lock_irqsave(&xsmp_glob_lock, flags);
+ list_add_tail(&xsmp_ctx->list, &port->xsmp_list);
+ list_add_tail(&xsmp_ctx->glist, &gxsmp_list);
+ spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+ xcpm_xsmp_add_proc_entry(xsmp_ctx);
+ xsmp_ctx->sm_delay = 1000;
+ queue_sm_work(xsmp_ctx, 0);
+ return 0;
+}
+
+void xsmp_module_init(void)
+{
+ spin_lock_init(&xsmp_glob_lock);
+ mutex_init(&svc_mutex);
+ mutex_init(&xsmp_mutex);
+ idr_init(&xsmp_id_table);
+ INIT_LIST_HEAD(&gxsmp_list);
+}
+
+void xsmp_module_destroy(void)
+{
+ idr_destroy(&xsmp_id_table);
+ mutex_destroy(&svc_mutex);
+ mutex_destroy(&xsmp_mutex);
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _XSMP_H_
+#define _XSMP_H_
+
+enum {
+ XSMP_REG_SENT_COUNTER,
+ XSMP_REG_CONF_COUNTER,
+ XSMP_RES_LIST_COUNTER,
+ XSMP_HELLO_RCVD_COUNTER,
+ XSMP_HELLO_INTERRUPT_COUNTER,
+ XSMP_REJ_RCVD_COUNTER,
+ XSMP_HELLO_SENT_COUNTER,
+ XSMP_SEQ_MISMATCH_COUNTER,
+ XSMP_SESSION_TIMEOUT_COUNTER,
+ XSMP_SHUTDOWN_RCVD_COUNTER,
+ XSMP_SHUTDOWN_SENT_COUNTER,
+ XSMP_VNIC_MESSAGE_COUNTER,
+ XSMP_VHBA_MESSAGE_COUNTER,
+ XSMP_USPACE_MESSAGE_COUNTER,
+ XSMP_XVE_MESSAGE_COUNTER,
+ XSMP_SESSION_MESSAGE_COUNTER,
+ XSMP_VNIC_MESSAGE_SENT_COUNTER,
+ XSMP_VHBA_MESSAGE_SENT_COUNTER,
+ XSMP_USPACE_MESSAGE_SENT_COUNTER,
+ XSMP_XVE_MESSAGE_SENT_COUNTER,
+ XSMP_SESSION_MESSAGE_SENT_COUNTER,
+ XSMP_SESSION_RING_FULL_COUNTER,
+ XSMP_SESSION_SEND_ERROR_COUNTER,
+ XSMP_SESSION_CONN_DOWN_COUNTER,
+ XSMP_TOTAL_MSG_SENT_COUNTER,
+ XSMP_CONN_RETRY_COUNTER,
+ XSMP_CONN_FAILED_COUNTER,
+ XSMP_CONN_SUCCESS_COUNTER,
+ XSMP_MAX_COUNTERS
+};
+
+enum {
+ XSMP_SESSION_ERROR,
+ XSMP_SESSION_INIT,
+ XSMP_SESSION_TPT_CONNECTING,
+ XSMP_SESSION_TPT_CONNECTED,
+ XSMP_SESSION_CONNECTING,
+ XSMP_SESSION_CONNECTED,
+ XSMP_SESSION_DISCONNECTING,
+ XSMP_SESSION_DISCONNECTED,
+};
+
+struct xsmp_ctx {
+ spinlock_t lock;
+ int state;
+ atomic_t ref_cnt;
+ unsigned long flags;
+#define XSMP_DELETE_BIT 1
+#define XSMP_SHUTTINGDOWN_BIT 2
+#define XSMP_REG_SENT 3
+#define XSMP_REG_CONFIRM_RCVD 4
+#define XSMP_IBLINK_DOWN 5
+ struct list_head list;
+ struct list_head glist;
+ int idr;
+ unsigned long jiffies;
+ unsigned long hello_jiffies;
+ struct xscore_port *port;
+ struct xscore_conn_ctx conn_ctx;
+ u64 dguid;
+ u16 dlid;
+ struct delayed_work sm_work;
+ int sm_delay;
+ int hello_timeout;
+ struct workqueue_struct *wq;
+ int seq_number;
+ u32 counters[XSMP_MAX_COUNTERS];
+ u32 rcv_seq_number;
+ u32 xsigo_xsmp_version;
+ int datapath_timeout;
+ char chassis_name[CHASSIS_NAME_LEN];
+ char session_name[SESSION_NAME_LEN];
+};
+
+extern void xcpm_xsmp_add_proc_entry(struct xsmp_ctx *xsmp_ctx);
+extern void xcpm_xsmp_remove_proc_entry(struct xsmp_ctx *xsmp_ctx);
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSMP_COMMON_H__
+#define __XSMP_COMMON_H__
+
+/*
+ * Node ID: A 96-bit identifier of the initiating node
+ * The lower part is the 'guid'
+ */
+struct xsmp_node_id {
+ u32 node_id_aux;
+ u64 node_id_primary;
+} __packed;
+
+/*
+ * The XSMP message header
+ *
+ * The message header precedes all XSMP messages from either
+ * the XCM or the server.
+ * 'message_type' identifies the class of the message.
+ * 'seq_number' is a serially incrementing count (different
+ * for each direction) used to track the order of messages.
+ *
+ * This is followed by a series of message objects (of the same
+ * class) adding up to the 'length' field of the header.
+ */
+struct xsmp_message_header {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 seq_number;
+ struct xsmp_node_id source_id;
+ struct xsmp_node_id dest_id;
+} __packed;
+
+#define XSMP_MESSAGE_TYPE_SESSION 1
+#define XSMP_MESSAGE_TYPE_VNIC 2
+#define XSMP_MESSAGE_TYPE_VHBA 3
+#define XSMP_MESSAGE_TYPE_VSSL 4
+#define XSMP_MESSAGE_TYPE_USPACE 5
+#define XSMP_MESSAGE_TYPE_XVE 6
+
+#define XSMP_MESSAGE_TYPE_MAX 8
+
+enum xscore_cap_flags {
+ RESOURCE_FLAG_INDEX_VNIC = 0,
+ RESOURCE_FLAG_INDEX_VHBA = 1,
+ RESOURCE_FLAG_INDEX_VSSL = 2,
+ RESOURCE_FLAG_INDEX_USPACE = 3,
+ RESOURCE_FLAG_INDEX_NO_HA = 4,
+ RESOURCE_FLAG_INDEX_XVE = 6,
+ RESOURCE_FLAG_INDEX_MAX
+};
+
+#define RESOURCE_VNIC (1 << RESOURCE_FLAG_INDEX_VNIC)
+#define RESOURCE_VHBA (1 << RESOURCE_FLAG_INDEX_VHBA)
+#define RESOURCE_VSSL (1 << RESOURCE_FLAG_INDEX_VSSL)
+#define RESOURCE_USPACE (1 << RESOURCE_FLAG_INDEX_USPACE)
+#define RESOURCE_NO_HA (1 << RESOURCE_FLAG_INDEX_NO_HA)
+
+#endif /* __XSMP_COMMON_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSMP_SESSION_H__
+#define __XSMP_SESSION_H__
+
+#include "xsmp_common.h"
+
+/* Session management messages */
+
+/* Session message types */
+enum xsmp_session_cmd_type {
+ XSMP_SESSION_UNUSED = 0,
+
+ /* Heartbeat between the server and XCM */
+ XSMP_SESSION_HELLO,
+
+ /*
+ * Used by the server while initiating a connection to an XCM
+ * 'resource_flags' specify which services are already active
+ */
+ XSMP_SESSION_REGISTER,
+
+ /* Positive reply from XCM in response to a register from server */
+ XSMP_SESSION_REG_CONFIRM,
+
+ /*
+ * Negative reply from XCM in response to a register from server
+ * 'reason_code' specifies the reason for the reject
+ */
+ XSMP_SESSION_REG_REJECT,
+
+ /* Session shutdown message: initiated by either server or XCM */
+ XSMP_SESSION_SHUTDOWN,
+
+ /* List of services that are active: sent by server to XCM */
+ XSMP_SESSION_RESOURCE_LIST,
+
+ /* Set of error counts sent by server to XCM */
+ XSMP_SESSION_ERROR_STATS,
+
+ /*
+ * Secondary timeout value specified by XCM
+ * after which the datapaths are aborted
+ */
+ XSMP_SESSION_STALE_TIME,
+};
+
+#define CHASSIS_NAME_LEN 32
+#define SESSION_NAME_LEN 32
+struct xsmp_session_msg {
+ union {
+ struct {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 resource_flags;
+ u32 version; /* current driver version */
+ u32 chassis_version; /* chassis sw version
+ * this driver can work with */
+ u32 boot_flags;
+ u64 fw_ver;
+ u32 hw_ver;
+ u32 vendor_part_id;
+ u32 xsigo_xsmp_version;
+ char chassis_name[CHASSIS_NAME_LEN];
+ char session_name[SESSION_NAME_LEN];
+ } __packed;
+ u8 bytes[224];
+ };
+} __packed;
+
+enum {
+ RESOURCE_OS_TYPE_LINUX = 0x01000000,
+ RESOURCE_OS_TYPE_VMWARE = 0x02000000,
+ RESOURCE_MS_CLIENT = 0x80000000,
+};
+
+#endif /* __XSMP_SESSION_H__ */
--- /dev/null
+config INFINIBAND_XSVHBA
+ tristate "Xsigo Virtual HBA"
+ depends on INFINIBAND_XSCORE
+ ---help---
+ Support for the Xsigo virtual HBA allowing SAN
+ Connectivity.
+
+config VHBA_DEBUG
+ bool "Xsigo Virtual HBA debugging" if EMBEDDED
+ depends on INFINIBAND_XSVHBA
+ default n
+ ---help---
+ This option causes debugging code to be compiled into the
+ Xsigo xsvhba driver. The output can be turned on via the
+ vhba_debug_level module parameter (which can also be
+ after the driver is loaded through sysfs).
--- /dev/null
+obj-$(CONFIG_INFINIBAND_XSVHBA) := xsvhba.o
+xsvhba-y := vhba_main.o vhba_xsmp.o vhba_create.o vhba_init.o vhba_delete.o \
+ vhba_attr.o vhba_wq.o vhba_proc.o vhba_stats.o vhba_ib.o \
+ vhba_scsi_intf.o vhba_align.o
+
+ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\"
+ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS
+ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE
+ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT
+ccflags-y += -Idrivers/infiniband/ulp/xsigo/xscore
+ccflags-y += -Idrivers/infiniband/include
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+ /* File which houses logig to align cmd->request_buffer sg lists
+ * to make them work with the IB FMR frames
+ * Bugs: This code uses PAGE_SIZE as HCA Page size. This is a horribly
+ * incorrect assumption.
+ */
+#include <linux/version.h>
+#include <linux/blkdev.h>
+
+#include <linux/highmem.h>
+
+#include "xs_compat.h"
+#include "vhba_align.h"
+#include "vhba_os_def.h"
+
+static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
+ void *buf, size_t buflen, int to_buffer)
+{
+ struct scatterlist *sg;
+ size_t buf_off = 0;
+ int i;
+
+ for (i = 0, sg = sgl; i < nents; i++, sg++) {
+ struct page *page;
+ int n = 0;
+ unsigned int sg_off = sg->offset;
+ unsigned int sg_copy = sg->length;
+
+ if (sg_copy > buflen)
+ sg_copy = buflen;
+ buflen -= sg_copy;
+
+ while (sg_copy > 0) {
+ unsigned int page_copy;
+ void *p;
+
+ page_copy = PAGE_SIZE - sg_off;
+ if (page_copy > sg_copy)
+ page_copy = sg_copy;
+
+ page = nth_page(sg_page(sg), n);
+ p = kmap_atomic(page);
+
+ if (to_buffer)
+ memcpy(buf + buf_off, p + sg_off, page_copy);
+ else {
+ memcpy(p + sg_off, buf + buf_off, page_copy);
+ flush_kernel_dcache_page(page);
+ }
+
+ kunmap_atomic(p);
+
+ buf_off += page_copy;
+ sg_off += page_copy;
+ if (sg_off == PAGE_SIZE) {
+ sg_off = 0;
+ n++;
+ }
+ sg_copy -= page_copy;
+ }
+
+ if (!buflen)
+ break;
+ }
+
+ return buf_off;
+}
+
+size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
+ void *buf, size_t buflen)
+{
+ return sg_copy_buffer(sgl, nents, buf, buflen, 0);
+}
+
+size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
+ void *buf, size_t buflen)
+{
+ return sg_copy_buffer(sgl, nents, buf, buflen, 1);
+}
+
+struct scatterlist *vhba_setup_bounce_buffer(struct srb *sp)
+{
+ struct scatterlist *scat;
+ int nents;
+ int total_len = 0;
+ void *memp;
+ struct scatterlist *orig_sg = scsi_sglist(sp->cmd);
+
+ nents = scsi_sg_count(sp->cmd);
+ scat = scsi_sglist(sp->cmd);
+ total_len = scsi_bufflen(sp->cmd);
+
+ memp = (void *)__get_free_pages(GFP_ATOMIC,
+ max(2, get_order(total_len)));
+
+ if (!memp)
+ return NULL;
+
+ if (sp->cmd->sc_data_direction == DMA_TO_DEVICE)
+ sg_copy_to_buffer(scat, nents, memp, total_len);
+
+ /*
+ * Sajid check here, we should not be mucking around with use_sg here
+ */
+ sp->use_sg_orig = scsi_sg_count(sp->cmd);
+ scsi_set_buffer(sp->cmd, memp);
+ sp->bounce_buffer = memp;
+ set_scsi_sg_count(sp->cmd, 0);
+ sp->bounce_buf_len = total_len;
+
+ return orig_sg;
+}
+
+void vhba_tear_bounce_buffer(struct srb *sp)
+{
+ int total_len;
+ void *memp;
+ int nents;
+ struct scatterlist *scat;
+
+ scsi_set_buffer(sp->cmd, sp->unaligned_sg);
+ set_scsi_sg_count(sp->cmd, sp->use_sg_orig);
+
+ nents = scsi_sg_count(sp->cmd);
+ scat = scsi_sglist(sp->cmd);
+ memp = sp->bounce_buffer;
+ total_len = sp->bounce_buf_len;
+
+ if (sp->cmd->sc_data_direction == DMA_FROM_DEVICE)
+ sg_copy_from_buffer(scat, nents, memp, total_len);
+
+ sp->bounce_buffer = NULL;
+ sp->bounce_buf_len = 0;
+ sp->unaligned_sg = NULL;
+
+ free_pages((unsigned long)memp, max(2, get_order(total_len)));
+}
+
+int check_sg_alignment(struct srb *sp, struct scatterlist *sg)
+{
+ int i;
+ int ret = 0;
+ unsigned int sg_offset = SG_OFFSET(sg);
+
+ /*
+ * check for 8 byte alignment only for sg entry
+ * as we can handle an offset for the first entry alone
+ * rest of the entries should be 4k (and thus also 8 byte)
+ * aligned
+ */
+ if ((sg_offset + SG_LENGTH(sg)) % PAGE_SIZE) {
+ dprintk(TRC_UNALIGNED, NULL,
+ "Need to copy. SG_LENGTH:%d/scsi_sg_count:%d\n",
+ SG_LENGTH(sg), scsi_sg_count(sp->cmd));
+ ret = 1;
+ goto out;
+ }
+ SG_NEXT(sg);
+
+ /* Check from second entry */
+ for (i = 1; i < scsi_sg_count(sp->cmd); i++, SG_NEXT(sg)) {
+ sg_offset = SG_OFFSET(sg);
+ /* All intermediate sg ptrs should be page (4k) aligned */
+ if (sg_offset) {
+ dprintk(TRC_UNALIGNED, NULL,
+ "ptr %d in sg list needs copy len %d addr ",
+ i, SG_LENGTH(sg));
+ dprintk(TRC_UNALIGNED, NULL, "align %llu\n",
+ (unsigned long long int)
+ (sg_offset & (PAGE_SIZE - 1)));
+ ret = 1;
+ goto out;
+ }
+
+ if ((i != (scsi_sg_count(sp->cmd) - 1))
+ && (SG_LENGTH(sg) % PAGE_SIZE)) {
+ dprintk(TRC_UNALIGNED, NULL,
+ "ptr %d in sg list needs copy len %d\n", i,
+ SG_LENGTH(sg));
+ ret = 1;
+ goto out;
+ }
+
+ }
+out:
+ SG_RESET(sg);
+ return ret;
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef _VHBA_ALIGN_H_
+#define _VHBA_ALIGN_H_
+
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h" /* Only for assert() */
+
+struct scatterlist *vhba_setup_bounce_buffer(struct srb *sp);
+void vhba_tear_bounce_buffer(struct srb *sp);
+int check_sg_alignment(struct srb *, struct scatterlist *);
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "vhba_xsmp.h"
+
+static void vhba_get_host_port_id(struct Scsi_Host *shost)
+{
+}
+
+static void vhba_get_host_speed(struct Scsi_Host *shost)
+{
+ struct virtual_hba *vhba =
+ vhba_get_context_by_idr((u32) *(shost->hostdata));
+ u32 speed = FC_PORTSPEED_4GBIT;
+
+ if (vhba == NULL) {
+ dprintk(TRC_PROC, NULL, "Could not find vhba\n");
+ return;
+ }
+
+ /* Hard coded for now, but we need this info
+ * sent from the I/O card to us.
+ switch (vhba->speed) {
+ case OFC_SPEED_1GBIT:
+ speed = FC_PORTSPEED_1GBIT;
+ break;
+ case OFC_SPEED_2GBIT:
+ speed = FC_PORTSPEED_2GBIT;
+ break;
+ case OFC_SPEED_4GBIT:
+ speed = FC_PORTSPEED_4GBIT;
+ break;
+ default:
+ speed = FC_PORTSPEED_UNKNOWN;
+ break;
+ }*/
+ fc_host_speed(shost) = speed;
+ DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_host_port_type(struct Scsi_Host *shost)
+{
+ fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
+}
+
+static void vhba_get_host_port_state(struct Scsi_Host *shost)
+{
+ struct virtual_hba *vhba;
+ int link_state;
+
+ vhba = vhba_get_context_by_idr((u32) *(shost->hostdata));
+ if (vhba == NULL) {
+ dprintk(TRC_PROC, NULL, "Could not find vhba\n");
+ return;
+ }
+
+ link_state = atomic_read(&vhba->ha->link_state);
+ switch (link_state) {
+ case 0:
+ fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN;
+ break;
+ case 1:
+ fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
+ break;
+ case 2:
+ fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE;
+ break;
+ default:
+ fc_host_port_state(shost) = FC_PORTSTATE_UNKNOWN;
+ break;
+ }
+ DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_host_symbolic_name(struct Scsi_Host *shost)
+{
+ struct virtual_hba *vhba;
+
+ vhba = vhba_get_context_by_idr((u32) *(shost->hostdata));
+ if (vhba == NULL) {
+ dprintk(TRC_PROC, NULL, "Could not find vhba\n");
+ return;
+ }
+ DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_host_fabric_name(struct Scsi_Host *shost)
+{
+ struct virtual_hba *vhba;
+ u64 node_name;
+
+ vhba = vhba_get_context_by_idr((u32) *(shost->hostdata));
+ if (vhba == NULL) {
+ dprintk(TRC_PROC, NULL, "Could not find vhba\n");
+ return;
+ }
+ node_name = vhba->cfg->wwn;
+ fc_host_fabric_name(shost) = node_name;
+ DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_starget_node_name(struct scsi_target *target)
+{
+
+ struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+ struct fc_port *fc;
+
+ vhba = vhba_get_context_by_idr((u32) *(host->hostdata));
+ if (vhba == NULL) {
+ pr_err("Error: Could not find vhba for this command\n");
+ return;
+ }
+ ha = vhba->ha;
+
+ list_for_each_entry(fc, &ha->disc_ports, list) {
+ if (fc->os_target_id == target->id) {
+ fc_starget_node_name(target) =
+ __be64_to_cpu(*(uint64_t *) fc->node_name);
+ DEC_REF_CNT(vhba);
+ return;
+ }
+ }
+ fc_starget_node_name(target) = -1;
+ DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_starget_port_name(struct scsi_target *target)
+{
+ struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+ struct fc_port *fc;
+
+ vhba = vhba_get_context_by_idr((u32) *(host->hostdata));
+ if (vhba == NULL) {
+ pr_err("Error: Could not find vhba for this command\n");
+ return;
+ }
+ ha = vhba->ha;
+
+ list_for_each_entry(fc, &ha->disc_ports, list) {
+ if (fc->os_target_id == target->id) {
+ fc_starget_port_name(target) =
+ __be64_to_cpu(*(uint64_t *) fc->port_name);
+ DEC_REF_CNT(vhba);
+ return;
+ }
+ }
+ fc_starget_port_name(target) = -1;
+ DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_starget_port_id(struct scsi_target *target)
+{
+ struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+ struct fc_port *fc;
+
+ vhba = vhba_get_context_by_idr((u32) *(host->hostdata));
+ if (vhba == NULL) {
+ pr_err("Error: Could not find vhba for this command\n");
+ return;
+ }
+ ha = vhba->ha;
+
+ list_for_each_entry(fc, &ha->disc_ports, list) {
+ if (fc->os_target_id == target->id) {
+ fc_starget_port_id(target) = fc->d_id.b.domain << 16 |
+ fc->d_id.b.area << 8 | fc->d_id.b.al_pa;
+ DEC_REF_CNT(vhba);
+ return;
+ }
+ }
+ fc_starget_port_id(target) = -1;
+ DEC_REF_CNT(vhba);
+}
+
+static void vhba_set_rport_loss_tmo(struct fc_rport *rport, uint32_t timeout)
+{
+ if (timeout)
+ rport->dev_loss_tmo = timeout + 5;
+ else
+ rport->dev_loss_tmo = 30; /* Default value XXX revisit */
+
+}
+
+struct fc_host_statistics *vhba_get_fc_host_stats(struct Scsi_Host *shp)
+{
+ return NULL;
+}
+
+struct fc_function_template vhba_transport_functions = {
+
+ .show_host_node_name = 1,
+ .show_host_port_name = 1,
+ .show_host_supported_classes = 1,
+ .show_host_supported_fc4s = 1,
+ .show_host_active_fc4s = 1,
+
+ .get_host_port_id = vhba_get_host_port_id,
+ .show_host_port_id = 1,
+ .get_host_speed = vhba_get_host_speed,
+ .show_host_speed = 1,
+ .get_host_port_type = vhba_get_host_port_type,
+ .show_host_port_type = 1,
+ .get_host_port_state = vhba_get_host_port_state,
+ .show_host_port_state = 1,
+ .get_host_symbolic_name = vhba_get_host_symbolic_name,
+ .show_host_symbolic_name = 1,
+
+ .dd_fcrport_size = sizeof(struct os_tgt),
+ .show_rport_supported_classes = 1,
+
+ .get_host_fabric_name = vhba_get_host_fabric_name,
+ .show_host_fabric_name = 1,
+ .get_starget_node_name = vhba_get_starget_node_name,
+ .show_starget_node_name = 1,
+ .get_starget_port_name = vhba_get_starget_port_name,
+ .show_starget_port_name = 1,
+ .get_starget_port_id = vhba_get_starget_port_id,
+ .show_starget_port_id = 1,
+ .set_rport_dev_loss_tmo = vhba_set_rport_loss_tmo,
+ .show_rport_dev_loss_tmo = 1,
+ .get_fc_host_stats = vhba_get_fc_host_stats,
+
+};
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * vhba_create.c
+ */
+
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+/* #include <linux/smp_lock.h> */
+#include <linux/delay.h>
+#include "vhba_xsmp.h"
+#include "vhba_defs.h"
+#include "vhba_ib.h"
+#include "vhba_scsi_intf.h"
+
+#include <scsi/scsi_transport_fc.h>
+
+static u32 xg_vhba_mem_alloc(struct virtual_hba *);
+
+int vhba_create(xsmp_cookie_t xsmp_hndl, struct vhba_xsmp_msg *msg)
+{
+ struct virtual_hba *vhba;
+ struct Scsi_Host *host;
+ struct scsi_xg_vhba_host *ha;
+ struct vhba_xsmp_msg *msg1;
+ uint32_t mtu;
+ u32 i;
+ int ret;
+ int vhba_xsmp_msg_len = sizeof(struct vhba_xsmp_msg);
+ enum vhba_xsmp_error_codes nack_code = VHBA_NACK_GENERAL_ERROR;
+
+ vhba = kmalloc(sizeof(struct virtual_hba), GFP_ATOMIC);
+ if (!vhba) {
+ eprintk(NULL, "vhba alloc failed\n");
+ vhba_xsmp_nack(xsmp_hndl, (u8 *) msg, vhba_xsmp_msg_len,
+ VHBA_NACK_ALLOC_ERROR);
+ return 1;
+ }
+
+ memset(vhba, 0, sizeof(struct virtual_hba));
+
+ atomic_set(&vhba->ref_cnt, 0);
+ atomic_set(&vhba->vhba_state, VHBA_STATE_NOT_ACTIVE);
+
+ init_waitqueue_head(&vhba->timer_wq);
+ init_waitqueue_head(&vhba->delete_wq);
+
+ ret = vhba_create_context(msg, vhba);
+
+ if (ret == 0) {
+ /*
+ * Duplicate vHBA, probably due to previous sync operation
+ */
+ dprintk(TRC_XSMP_ERRS, NULL,
+ "VHBA with resource_id <0x%Lx> exists, ",
+ msg->resource_id);
+ dprintk(TRC_XSMP_ERRS, NULL, "not installing\n");
+ vhba->xsmp_hndl = xsmp_hndl;
+ vhba_xsmp_ack(xsmp_hndl, (u8 *) msg, vhba_xsmp_msg_len);
+ vhba_xsmp_notify(xsmp_hndl, msg->resource_id,
+ XSMP_VHBA_OPER_UP);
+ kfree(vhba);
+ return 0;
+ } else if (ret == -1) {
+ eprintk(NULL, "mem alloc failed\n");
+ nack_code = VHBA_NACK_ALLOC_ERROR;
+ goto err_ret_5;
+ } else if (ret != 1) {
+ eprintk(NULL, "Error: unable to create context [%s]\n",
+ msg->vh_name);
+ goto err_ret_5;
+ }
+
+ mtu = msg->mtu;
+
+ if (mtu == 0)
+ mtu = 256; /* 256KB */
+ if (mtu > 2040) /* 2MB - 8KB */
+ mtu = 2040;
+ dprintk(TRC_XSMP, NULL, "mtu size=%d\n", mtu);
+
+ vhba_max_dsds_in_fmr = (mtu * 1024) / PAGE_SIZE;
+ vhba_max_fmr_pages = ((mtu * 1024) / PAGE_SIZE) + 2;
+ vhba_max_transfer_size = (mtu * 1024) / 512;
+
+ xg_vhba_driver_template.sg_tablesize = vhba_max_dsds_in_fmr;
+
+ if (vhba_max_transfer_size != VHBA_DEFAULT_TRANSFER_SIZE)
+ xg_vhba_driver_template.max_sectors = vhba_max_transfer_size;
+
+ host = scsi_host_alloc(&xg_vhba_driver_template, sizeof(int));
+
+ if (host == NULL) {
+ eprintk(NULL, "scsi host alloc failed\n");
+ nack_code = VHBA_NACK_ALLOC_ERROR;
+ goto err_ret_5;
+ }
+
+ ha = (struct scsi_xg_vhba_host *)
+ kmalloc(sizeof(struct scsi_xg_vhba_host), GFP_ATOMIC);
+
+ if (!ha) {
+ eprintk(NULL, "Ha alloc failed\n");
+ nack_code = VHBA_NACK_ALLOC_ERROR;
+ goto err_ret_4;
+ }
+ memset(ha, 0, sizeof(struct scsi_xg_vhba_host));
+ ha->host = host;
+ ha->host_no = host->host_no;
+ sprintf(ha->host_str, "%ld", ha->host_no);
+
+ spin_lock_init(&ha->io_lock);
+
+ /* Initialize proc related counters */
+ ha->stats.io_stats.total_io_rsp = 0;
+ ha->stats.io_stats.total_read_reqs = 0;
+ ha->stats.io_stats.total_write_reqs = 0;
+ ha->stats.io_stats.total_task_mgmt_reqs = 0;
+ ha->stats.io_stats.total_read_mbytes = 0;
+ ha->stats.io_stats.total_write_mbytes = 0;
+ ha->stats.io_stats.total_copy_ios = 0;
+ ha->stats.io_stats.total_copy_page_allocs = 0;
+ ha->stats.io_stats.total_copy_page_frees = 0;
+
+ for (i = 0; i < VHBA_MAX_VH_Q_COUNT; i++) {
+ atomic_set(&ha->stats.io_stats.num_vh_q_reqs[i], 0);
+ atomic_set(&ha->stats.io_stats.vh_q_full_cnt[i], 0);
+ }
+
+ ha->ports = MAX_BUSES;
+ ha->request_q_length = REQUEST_ENTRY_CNT_24XX;
+ host->can_queue = vhba_max_q_depth;
+ if ((vhba_max_q_depth > 64) || (vhba_max_q_depth < 1)) {
+ /*
+ * Looks like a bogus value, set it to default (16).
+ */
+ host->can_queue = VHBA_MAX_VH_Q_DEPTH;
+ }
+ ha->data_qp_handle = 0;
+ ha->control_qp_handle = 0;
+ atomic_set(&ha->qp_status, VHBA_QP_NOTCONNECTED);
+
+ for (i = 0; i < REQUEST_ENTRY_CNT_24XX; i++)
+ ha->send_buf_ptr[i] = NULL;
+
+ spin_lock_init(&ha->list_lock);
+ INIT_LIST_HEAD(&ha->disc_ports);
+ INIT_LIST_HEAD(&ha->defer_list);
+ atomic_set(&ha->periodic_def_cnt, 0);
+
+ dprintk(TRC_XSMP, NULL, "create_vhba: new vhba = %p\n", (void *)vhba);
+
+ *(host->hostdata) = (int)vhba->idr;
+ vhba->ha = ha;
+ ha->vhba = vhba;
+ ha->max_tgt_id = 0;
+ ha->max_targets = 0;
+ ha->tca_guid = be64_to_cpu(msg->tca_guid);
+ ha->tca_lid = be16_to_cpu(msg->tca_lid);
+
+ vhba->xsmp_hndl = xsmp_hndl;
+ vhba->scanned_once = 0;
+ vhba->scan_reqd = 0;
+ vhba->sync_needed = 0;
+ vhba->ha->sync_flag = 1;
+ vhba->reconn_try_cnt = 0;
+
+ xcpm_get_xsmp_session_info(xsmp_hndl, &vhba->xsmp_info);
+
+ if (msg->vhba_flag & 0x1)
+ dprintk(TRC_XSMP, NULL, "This is a boot vhba\n");
+
+ if ((msg->vhba_flag & 0x1) == 0x0)
+ dprintk(TRC_XSMP, NULL, "This is a regular vhba\n");
+
+ if (xg_vhba_mem_alloc(vhba)) {
+ eprintk(vhba, "failure in xg_vhba_mem_alloc\n");
+ nack_code = VHBA_NACK_ALLOC_ERROR;
+ goto err_ret_2;
+ }
+
+ msg1 = kmalloc(sizeof(struct vhba_xsmp_msg), GFP_ATOMIC);
+
+ if (!msg1) {
+ eprintk(vhba, "kmalloc for vhba xsmp msg failed\n");
+ nack_code = VHBA_NACK_ALLOC_ERROR;
+ goto err_ret_1;
+ }
+
+ memcpy(msg1, msg, sizeof(struct vhba_xsmp_msg));
+ vhba->cfg = msg1;
+ ha->resource_id = msg->resource_id;
+ vhba->resource_id = msg->resource_id;
+ dprintk(TRC_INFO, vhba, "resource id is %Lx\n", msg->resource_id);
+ host->this_id = 255;
+ host->cmd_per_lun = cmds_per_lun;
+ host->max_cmd_len = MAX_CMDSZ;
+ host->max_channel = ha->ports - 1;
+ if (vhba->cfg->lunmask_enable)
+ ha->max_luns = 256;
+ else
+ ha->max_luns = MAX_FIBRE_LUNS_MORE;
+ host->max_lun = ha->max_luns;
+ host->unique_id = ha->instance;
+ dprintk(TRC_XSMP, vhba,
+ "detect hba %ld at address = %p\n", ha->host_no, ha);
+
+ /* Use the VMware consistent naming convention & register the
+ * device as a FC-capable transport. This FC-transport template
+ * needs to be pre-registered, and typically during module init */
+ host->transportt = vhba_transport_template;
+ host->max_channel = 0;
+ host->max_lun = MAX_LUNS - 1; /*0xffff-1 */
+ host->max_id = MAX_TARGETS;
+ ha->flags.init_done = 1;
+ ret = scsi_add_host(host, NULL);
+ if (ret) {
+ pr_err("scsi_add_host failed: ret = %d\n", ret);
+ goto err_ret_1;
+ }
+
+ {
+ u64 port_name = wwn_to_u64((u8 *) &(vhba->cfg->wwn));
+ /* Hard coding the node name isn't right, but dng it for now */
+ u64 node_name = port_name | 0x100000000;
+
+ fc_host_node_name(host) = node_name;
+ fc_host_port_name(host) = port_name;
+
+ }
+
+ if (vhba_initialize(vhba, msg1)) {
+ eprintk(vhba, "scsi(%ld): Failed to initialize adapter -\n"
+ "Adapter flags %x.\n", ha->host_no, ha->device_flags);
+ goto err_ret_0;
+ }
+
+ vhba_xsmp_ack(xsmp_hndl, (u8 *) msg, vhba_xsmp_msg_len);
+ sprintf((char *)ha->vhba_name, "vhba:%p", vhba);
+ vhba_add_proc_entry(vhba);
+ vhba_add_target_proc_entry(vhba);
+ vhba_add_context(vhba);
+
+ /* Any VHBA context setting, data & control IB queue pairs, etc.. */
+ ret = vhba_conn_init(vhba);
+ if (ret) {
+ eprintk(vhba, "Trouble doing Conn Init. Returning %d\n", ret);
+ vhba_remove_context(vhba->resource_id);
+ goto err_ret_0;
+ }
+ ret = vhba_ib_connect_qp(vhba);
+ if (ret) {
+ eprintk(vhba, "Trouble Setting up Conn. Returning %d\n", ret);
+ vhba_remove_context(vhba->resource_id);
+ goto err_ret_0;
+ }
+ atomic_inc(&vhba_count);
+ return 0;
+
+err_ret_0:
+ scsi_host_put(host);
+err_ret_1:
+ if (vhba->cfg != NULL)
+ kfree(vhba->cfg);
+err_ret_2:
+ kfree(ha);
+err_ret_4:
+err_ret_5:
+ kfree(vhba);
+ vhba_xsmp_nack(xsmp_hndl, (u8 *) msg, vhba_xsmp_msg_len, nack_code);
+ return -1;
+}
+
+void xg_vhba_free_device(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+
+ if (ha->request_ring) {
+ ib_dma_free_coherent(vhba->xsmp_info.ib_device,
+ ha->request_q_length *
+ sizeof(struct cmd_type_7),
+ ha->request_ring, ha->request_dma);
+ dprintk(TRC_XSMP, vhba,
+ "called ib_dma_free_coherent for req ring\n");
+ } else
+ dprintk(TRC_XSMP_ERRS, vhba, "request ring already NULL!\n");
+
+ if (ha->req_ring_rindex) {
+ ib_dma_free_coherent(vhba->xsmp_info.ib_device,
+ sizeof(u32), ha->req_ring_rindex,
+ ha->req_ring_rindex_dma);
+ dprintk(TRC_XSMP, vhba,
+ "called dma_free_coherent for req ring rindex\n");
+ } else
+ dprintk(TRC_XSMP_ERRS, vhba, "request ring ptr already NULL\n");
+}
+
+int get_outstding_cmd_entry(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ int start_cnt = ha->current_outstanding_cmd;
+ int curr_cnt = ha->current_outstanding_cmd;
+
+ while ((curr_cnt < MAX_OUTSTANDING_COMMANDS)) {
+ if (ha->outstanding_cmds[curr_cnt] == NULL) {
+ ha->current_outstanding_cmd = curr_cnt;
+ return curr_cnt;
+ } else
+ curr_cnt++;
+ }
+
+ ha->stats.ib_stats.total_outstding_q_wraps++;
+ curr_cnt = 0;
+ while (curr_cnt < start_cnt) {
+ if (ha->outstanding_cmds[curr_cnt] == NULL) {
+ ha->current_outstanding_cmd = curr_cnt;
+ return curr_cnt;
+ } else
+ curr_cnt++;
+ }
+
+ ha->stats.ib_stats.total_req_q_fulls++;
+ return -1;
+}
+
+static u32 xg_vhba_mem_alloc(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+
+ ha->request_ring = ib_dma_alloc_coherent(vhba->xsmp_info.ib_device,
+ ha->request_q_length *
+ sizeof(struct cmd_type_7),
+ &ha->request_dma, GFP_KERNEL);
+ if (ha->request_ring == NULL) {
+ eprintk(vhba, "alloc failed for req ring\n");
+ return 1;
+ }
+
+ ha->req_ring_rindex = ib_dma_alloc_coherent(vhba->xsmp_info.ib_device,
+ sizeof(u32),
+ &ha->req_ring_rindex_dma,
+ GFP_KERNEL);
+ if (ha->req_ring_rindex == NULL) {
+ ib_dma_free_coherent(vhba->xsmp_info.ib_device,
+ ha->request_q_length *
+ sizeof(struct cmd_type_7),
+ ha->request_ring, ha->request_dma);
+ eprintk(vhba, "alloc failed for req ring rindex\n");
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __VHBA_DEFS_H__
+#define __VHBA_DEFS_H__
+
+#include <linux/types.h>
+
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+
+#define VHBA_VALIDATE_STATE(vhba) \
+{ \
+ if (atomic_read(&vhba->ha->qp_status) != VHBA_QP_CONNECTED) { \
+ dprintk(0, vhba, \
+ "Error - QPs not connected!\n"); \
+ ret_error = 1; \
+ } \
+}
+
+extern int vhba_abort_recovery_count;
+extern struct scsi_transport_template *vhba_transport_template;
+extern int vhba_max_dsds_in_fmr;
+extern int vhba_max_fmr_pages;
+extern int hba_offset;
+extern int force_sp_copy;
+extern int vhba_use_fmr;
+extern int boot_vhba_use_fmr;
+extern struct scsi_host_template xg_vhba_driver_template;
+extern int cmds_per_lun;
+extern int vhba_max_transfer_size;
+extern int vhba_max_scsi_retry;
+extern int vhba_initialize(struct virtual_hba *vhba, struct vhba_xsmp_msg *msg);
+extern int vhba_add_proc_entry(struct virtual_hba *vhba);
+extern int vhba_add_target_proc_entry(struct virtual_hba *vhba);
+extern int vhba_remove_target_proc_entry(struct virtual_hba *vhba);
+extern void vhba_remove_proc_entry(struct virtual_hba *vhba);
+extern void add_to_defer_list(struct scsi_xg_vhba_host *ha, struct srb *sp);
+extern int vhba_map_buf_fmr(struct virtual_hba *vhba, u64 *phys_addr,
+ int num_pgs, u64 *mapped_fmr_iova, struct srb *sp,
+ int index);
+extern void extend_timeout(struct scsi_cmnd *cmd, struct srb *sp, int timeout);
+extern void ib_link_down(struct scsi_xg_vhba_host *ha);
+extern void ib_link_dead_poll(struct scsi_xg_vhba_host *ha);
+extern int vhba_send_heart_beat(struct virtual_hba *vhba);
+extern int check_number_of_vhbas_provisioned(void);
+extern int vhba_check_discs_status(void);
+
+int vhba_create_procfs_root_entries(void);
+void vhba_remove_procfs_root_entries(void);
+ssize_t vhba_read(struct file *, char *, size_t, loff_t *);
+ssize_t vhba_write(struct file *, const char *, size_t, loff_t *);
+int vhba_open(struct inode *, struct file *);
+int vhba_release(struct inode *, struct file *);
+int vhba_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+
+void vhba_internal_processing(void);
+
+/*
+ * Globals
+ */
+extern struct semaphore vhba_init_sem;
+extern int vhba_ready;
+extern struct timer_list vhba_init_timer;
+extern int vhba_init_timer_on;
+
+extern struct semaphore vhba_cmd_sem;
+extern int vhba_cmd_done;
+extern struct timer_list vhba_cmd_timer;
+extern int vhba_cmd_timer_on;
+
+extern int bench_target_count;
+extern int vhba_multiple_q;
+
+#define VHBA_RECONN_INTERVAL 5
+#define MAX_IOCBS_IN_VH 2
+
+extern struct proc_dir_entry *proc_root_vhba;
+extern struct proc_dir_entry *proc_root_vhba_dev;
+extern struct proc_dir_entry *proc_root_vhba_targ;
+
+int vhba_print_io_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_ib_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_xsmp_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_fmr_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_fc_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_scsi_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+
+extern char vhba_version_str[40];
+extern int vhba_xsmp_service_id;
+extern struct service_type_info service_info;
+extern struct vhba_xsmp_stats vhba_xsmp_stats;
+
+extern int init_status;
+extern int dev_major;
+extern int vhba_ready;
+extern struct timer_list vhba_init_timer;
+extern int vhba_init_timer_on;
+extern struct vhba_discovery_msg disc_info;
+extern struct vhba_io_cmd vhba_io_cmd_o;
+
+void xg_vhba_free_device(struct virtual_hba *);
+int vhba_send_init_blk(struct virtual_hba *);
+int vhba_send_enable_vhba(struct virtual_hba *);
+int vhba_send_vhba_write_index(int);
+int send_abort_command(int, struct srb *sp, unsigned int t);
+int send_device_reset(int, unsigned int t);
+int send_link_reset(int);
+int vhbawq_init(void);
+int vhbawq_queue(void);
+int vhbawq_cleanup(void);
+
+#endif /* __VHBA_DEFS_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+/* #include <linux/smp_lock.h> */
+#include <linux/delay.h>
+
+#include "vhba_ib.h"
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+#include "vhba_defs.h"
+#include "vhba_scsi_intf.h"
+
+/* Get the driver IO-Lock for use here. */
+
+int vhba_delete(u64 resource_id)
+{
+ int i = 0, j = 0;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha = NULL;
+
+ vhba = vhba_remove_context(resource_id);
+
+ if (vhba == NULL) {
+ dprintk(TRC_XSMP_ERRS, NULL, "Non existent vhba\n");
+ return -EINVAL;
+ }
+
+ ha = vhba->ha;
+
+ /* Flush defered list */
+ if (atomic_read(&ha->ib_status) == VHBA_IB_DOWN) {
+ atomic_set(&ha->ib_link_down_cnt, 0);
+ atomic_set(&ha->ib_status, VHBA_IB_DEAD);
+ }
+
+ dprintk(TRC_XSMP, vhba, "disconnecting qps for vhba %p\n", vhba);
+ vhba_ib_disconnect_qp(vhba);
+ dprintk(TRC_XSMP, vhba, "purging ios for vhba %p\n", vhba);
+ vhba_purge_pending_ios(vhba);
+
+ dprintk(TRC_XSMP, vhba, "uniniting QP connections\n");
+ xscore_conn_destroy(&vhba->ctrl_conn.ctx);
+ xscore_conn_destroy(&vhba->data_conn.ctx);
+
+ atomic_set(&vhba->vhba_state, VHBA_STATE_NOT_ACTIVE);
+
+ if (atomic_read(&vhba->ref_cnt)) {
+ int err;
+
+ dprintk(TRC_XSMP, NULL, "%s(): vhba %p has ref_cnt %d,\n"
+ "waiting on...\n",
+ __func__, vhba, atomic_read(&vhba->ref_cnt));
+
+ err = wait_event_timeout(vhba->delete_wq,
+ !atomic_read(&vhba->ref_cnt), 30 * HZ);
+ if (err == 0) {
+ eprintk(vhba, "vhba_delete: ref_cnt %d is non zero\n",
+ atomic_read(&vhba->ref_cnt));
+ return -EIO;
+ }
+ }
+
+ dprintk(TRC_XSMP, NULL, "setting oper state dn\n");
+ vhba_xsmp_notify(vhba->xsmp_hndl,
+ vhba->resource_id, XSMP_VHBA_OPER_DOWN);
+
+ vhba_remove_proc_entry(vhba);
+ vhba_remove_target_proc_entry(vhba);
+
+ for (i = 0; i < REQUEST_ENTRY_CNT_24XX; i++) {
+ if (ha->send_buf_ptr[i] != NULL) {
+ kfree(ha->send_buf_ptr[i]);
+ ha->send_buf_ptr[i] = NULL;
+ }
+ }
+ xg_vhba_free_device(vhba);
+ vhba_scsi_release(vhba);
+
+ /*
+ * Free memory allocated for tgts/lun's etc.
+ */
+ for (i = 0; i < MAX_FIBRE_TARGETS; i++) {
+ if (TGT_Q(ha, i)) {
+ dprintk(TRC_XSMP, NULL, "freeing tgt %d\n", i);
+ for (j = 0; j < MAX_FIBRE_LUNS; j++) {
+ if (LUN_Q(ha, i, j)) {
+ dprintk(TRC_XSMP,
+ NULL, "freeing lun %d\n", j);
+ if (LUN_Q(ha, i, j)->fclun) {
+ kfree(LUN_Q(ha, i, j)->fclun);
+ LUN_Q(ha, i, j)->fclun = NULL;
+ }
+ kfree(LUN_Q(ha, i, j));
+ LUN_Q(ha, i, j) = NULL;
+ }
+ } /* end free all lun's under the tgt */
+ if (TGT_Q(ha, i)->fcport) {
+ kfree(TGT_Q(ha, i)->fcport);
+ TGT_Q(ha, i)->fcport = NULL;
+ }
+ kfree(TGT_Q(ha, i));
+ TGT_Q(ha, i) = NULL;
+ }
+
+ } /* end free all tgts */
+
+ kfree(ha);
+ kfree(vhba);
+ atomic_dec(&vhba_count);
+
+ return 0;
+} /* vhba_delete() */
+
+int vhba_scsi_release(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+
+ dprintk(TRC_XSMP, vhba, "deleting scsi host for vhba %p\n", vhba);
+
+ fc_remove_host(ha->host);
+
+ scsi_remove_host(ha->host);
+
+ scsi_host_put(ha->host);
+
+ vhba_dealloc_fmr_pool(vhba);
+
+ if (vhba->cfg != NULL) {
+ kfree(vhba->cfg);
+ vhba->cfg = NULL;
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+
+#include <linux/version.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <rdma/ib_verbs.h>
+
+#include "vhba_ib.h"
+#include "vhba_defs.h"
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+
+void vhba_data_send_comp_handler(void *client_arg, void *msg, int status,
+ int n);
+
+void vhba_connection_setup(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ int qp_status, ret;
+
+ qp_status = atomic_read(&ha->qp_status);
+
+ switch (qp_status) {
+ case VHBA_QP_RECONNECTING:
+ atomic_set(&ha->qp_status, VHBA_QP_PARTIAL_CONNECT);
+ break;
+ case VHBA_QP_PARTIAL_CONNECT:
+ atomic_set(&ha->qp_status, VHBA_QP_CONNECTED);
+ dprintk(TRC_INFO, vhba, "QP is connected\n");
+ vhba->reconn_attempt = 0;
+ vhba->qp_count++;
+ atomic_set(&ha->ib_status, VHBA_IB_UP);
+ dprintk(TRC_IB, vhba, "setting oper state up\n");
+ vhba_xsmp_notify(vhba->xsmp_hndl,
+ vhba->resource_id, XSMP_VHBA_OPER_UP);
+ break;
+ default:
+ eprintk(vhba,
+ "Error - Unexpected QP state detected %d\n", qp_status);
+ return;
+ } /* end switch */
+
+ if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED) {
+ dprintk(TRC_INFO, vhba, "sending init blk\n");
+ ret = vhba_send_init_blk(vhba);
+ if (ret)
+ eprintk(vhba, "sending init blk failed\n");
+ dprintk(TRC_INFO, vhba, "sending enable vhba\n");
+ ret = vhba_send_enable_vhba(vhba);
+ if (ret)
+ eprintk(vhba, "sending enable vhba failed\n");
+ }
+}
+
+void vhba_control_callback(void *context, int event)
+{
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ vhba = (struct virtual_hba *)vhba_get_context_by_idr((u32)
+ (unsigned long)
+ context);
+ if (!vhba) {
+ eprintk(NULL, "Invalid context\n");
+ return;
+ }
+ ha = vhba->ha;
+
+ switch (event) {
+ case XSCORE_CONN_RDISCONNECTED:
+ case XSCORE_CONN_LDISCONNECTED:
+ dprintk(TRC_IB, vhba, "Received Control Disconnect\n");
+ ha->stats.ib_stats.cqp_remote_disconn_cnt++;
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ break;
+ case XSCORE_CONN_CONNECTED:
+ dprintk(TRC_IB, vhba, "Control Is Connected\n");
+ ha->stats.ib_stats.cqp_up_cnt++;
+ ha->control_qp_handle = XSCORE_CONN_CONNECTED;
+ vhba_connection_setup(vhba);
+ break;
+ case XSCORE_CONN_ERR:
+ ib_link_down(ha);
+ ha->control_qp_handle = XSCORE_CONN_ERR;
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ break;
+ default:
+ break;
+ }
+ DEC_REF_CNT(vhba);
+
+}
+
+void vhba_data_callback(void *context, int event)
+{
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ vhba = (struct virtual_hba *)vhba_get_context_by_idr((u32)
+ (unsigned long)
+ context);
+ if (!vhba) {
+ eprintk(NULL, "Invalid COntext\n");
+ return;
+ }
+ ha = vhba->ha;
+
+ switch (event) {
+ case XSCORE_CONN_RDISCONNECTED:
+ case XSCORE_CONN_LDISCONNECTED:
+ dprintk(TRC_IB, vhba, "Received Data Disconnect\n");
+ ha->stats.ib_stats.dqp_remote_disconn_cnt++;
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ break;
+ case XSCORE_CONN_CONNECTED:
+ dprintk(TRC_IB, vhba, "Data Connected\n");
+ ha->data_qp_handle = XSCORE_CONN_CONNECTED;
+ vhba_connection_setup(vhba);
+ break;
+ case XSCORE_CONN_ERR:
+ ib_link_down(ha);
+ ha->data_qp_handle = XSCORE_CONN_ERR;
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ break;
+ default:
+ break;
+ }
+ DEC_REF_CNT(vhba);
+
+}
+
+int vhba_conn_init(struct virtual_hba *vhba)
+{
+ struct xsvhba_conn *cp = &vhba->ctrl_conn;
+ struct xscore_conn_ctx *cctx = &cp->ctx;
+ struct xt_cm_private_data *cmp;
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ int ret;
+
+ /*
+ * Control connection
+ */
+ cp->type = QP_TYPE_CONTROL;
+ cctx->tx_ring_size = 8;
+ cctx->rx_ring_size = 8;
+ cctx->rx_buf_size = VHBA_CQP_MAX_BUF_SIZE;
+ cctx->client_arg = (void *)(unsigned long)(vhba->idr);
+ cctx->event_handler = vhba_control_callback;
+ cctx->alloc_buf = 0;
+ cctx->free_buf = 0;
+ cctx->send_compl_handler = vhba_ctrl_send_comp_handler;
+ cctx->recv_msg_handler = vhba_cqp_recv_comp_handler;
+ cctx->dguid = ha->tca_guid;
+ cctx->dlid = ha->tca_lid;
+ cctx->service_id = be64_to_cpu(TCA_SERVICE_ID);
+ cctx->features = XSCORE_DONT_FREE_SENDBUF;
+
+ cmp = (struct xt_cm_private_data *)cctx->priv_data;
+ cmp->vid = cpu_to_be64(vhba->resource_id);
+ cmp->qp_type = cpu_to_be16(QP_TYPE_CONTROL);
+ cmp->data_qp_type = 0;
+ cctx->priv_data_len = sizeof(*cmp);
+
+ ret = xscore_conn_init(cctx, vhba->xsmp_info.port);
+ if (ret) {
+ eprintk(vhba, "xscore_conn_init ctrl error for VID %llx %d\n",
+ vhba->resource_id, ret);
+ return ret;
+ }
+
+ cp = &vhba->data_conn;
+ cctx = &cp->ctx;
+
+ cp->type = QP_TYPE_DATA;
+ cctx->tx_ring_size = VHBA_DQP_SEND_Q_SZ;
+ cctx->rx_ring_size = VHBA_DQP_RECV_Q_SZ;
+ cctx->rx_buf_size = VHBA_DQP_MAX_BUF_SIZE;
+ cctx->client_arg = (void *)(unsigned long)(vhba->idr);
+ cctx->event_handler = vhba_data_callback;
+ cctx->alloc_buf = 0;
+ cctx->free_buf = 0;
+ cctx->send_compl_handler = vhba_data_send_comp_handler;
+ cctx->recv_msg_handler = vhba_recv_comp_handler;
+ cctx->dguid = ha->tca_guid;
+ cctx->dlid = ha->tca_lid;
+ cctx->service_id = be64_to_cpu(TCA_SERVICE_ID);
+ cctx->features = XSCORE_RDMA_SUPPORT | XSCORE_DONT_FREE_SENDBUF;
+
+ cmp = (struct xt_cm_private_data *)cctx->priv_data;
+ cmp->vid = cpu_to_be64(vhba->resource_id);
+ cmp->qp_type = cpu_to_be16(QP_TYPE_DATA);
+ cmp->data_qp_type = 0;
+ cctx->priv_data_len = sizeof(*cmp);
+
+ ret = xscore_conn_init(cctx, vhba->xsmp_info.port);
+ if (ret) {
+ eprintk(vhba, "xscore_conn_init data error for VID %llx %d\n",
+ vhba->resource_id, ret);
+
+ xscore_conn_destroy(&vhba->ctrl_conn.ctx);
+ }
+ return ret;
+}
+
+int vhba_ib_connect_qp(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+
+ int ret = 0;
+
+ /* Create Control queue pair with the destination TCA */
+ if ((atomic_read(&ha->qp_status) == VHBA_QP_PARTIAL_CONNECT) ||
+ (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED)) {
+ dprintk(TRC_IB_ERRS, vhba, "Error - Invalid qp state: %d\n",
+ atomic_read(&ha->qp_status));
+ ret = 1;
+ goto out;
+ }
+
+ atomic_set(&ha->qp_status, VHBA_QP_RECONNECTING);
+
+ ret = xscore_conn_connect(&vhba->data_conn.ctx, 0);
+
+ if (ret) {
+ eprintk(vhba, "Data QP Connect failed: ret = %d\n", ret);
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ goto out;
+ }
+
+ ret = xscore_conn_connect(&vhba->ctrl_conn.ctx, 0);
+
+ if (ret) {
+ eprintk(vhba, "Control QP Connect failed: ret = %d\n", ret);
+ xscore_conn_disconnect(&vhba->data_conn.ctx, 0);
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+
+}
+
+int vhba_ib_disconnect_qp(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+
+ if (ha->control_qp_handle == XSCORE_CONN_CONNECTED) {
+ dprintk(TRC_IB, vhba, "Disconnecting Control\n");
+ xscore_conn_disconnect(&vhba->ctrl_conn.ctx, 0);
+ }
+
+ if (ha->data_qp_handle == XSCORE_CONN_CONNECTED) {
+ dprintk(TRC_IB, vhba, "Disconnecting Data\n");
+ xscore_conn_disconnect(&vhba->data_conn.ctx, 0);
+ }
+
+ atomic_set(&ha->qp_status, VHBA_QP_NOTCONNECTED);
+ return 0;
+}
+
+int vhba_alloc_fmr_pool(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct ib_device_attr dev_attr;
+ int ret;
+ int page_shift = 0;
+ struct ib_fmr_pool_param pool_params = {
+ .max_pages_per_fmr = vhba_max_fmr_pages,
+ .access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE,
+ .pool_size = VHBA_FMR_POOL_SIZE,
+ .dirty_watermark = VHBA_FMR_DIRTY_MARK,
+ .flush_function = 0,
+ .flush_arg = 0,
+ .cache = 1
+ };
+
+ ret = ib_query_device(vhba->xsmp_info.ib_device, &dev_attr);
+ if (ret) {
+ eprintk(vhba, "query_device error %d\n", ret);
+ return -1;
+ }
+
+ page_shift = ffs(dev_attr.page_size_cap) - 1;
+ if (page_shift < 0) {
+ page_shift = PAGE_SIZE;
+ dprintk(TRC_IB_ERRS, vhba,
+ "ib_query_device returned a page_size of 0\n");
+ }
+ page_shift = max(12, page_shift);
+
+ dprintk(TRC_IB, vhba, "Using page shift: %d\n", page_shift);
+
+ pool_params.page_shift = page_shift;
+
+ /*
+ * Allocate an fmr pool, assuming that the pd has been obtained
+ * before the call
+ */
+ ha->fmr_pool = ib_create_fmr_pool(vhba->xsmp_info.pd, &pool_params);
+
+ if (IS_ERR(ha->fmr_pool) || (!ha->fmr_pool)) {
+ ha->fmr_pool = NULL;
+ dprintk(TRC_IB_ERRS, vhba, "ib_create_fmr_pool failed\n");
+ return -1;
+ }
+ return 0;
+}
+
+void vhba_dealloc_fmr_pool(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+
+ if (ha->fmr_pool) {
+ ib_destroy_fmr_pool(ha->fmr_pool);
+ ha->fmr_pool = 0;
+ } else {
+ dprintk(TRC_IB_ERRS, vhba, "fmr pool ptr is null!\n");
+ }
+}
+
+int vhba_map_buf_fmr(struct virtual_hba *vhba, u64 *phys_addr, int num_pgs,
+ u64 *mapped_fmr_iova, struct srb *sp, int index)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+
+ if (!ha->fmr_pool) {
+ eprintk(vhba, "Error - null fmr pool ptr\n");
+ ha->stats.fmr_stats.map_fail_cnt++;
+ /* Revisit: Correct return value is -1 */
+ return 0;
+ }
+ sp->pool_fmr[index] = ib_fmr_pool_map_phys(ha->fmr_pool,
+ phys_addr, num_pgs,
+ *mapped_fmr_iova, NULL);
+
+ if (IS_ERR(sp->pool_fmr[index])) {
+ eprintk(vhba, "Error - pool fmr index map failed [%ld/%p]\n",
+ IS_ERR_VALUE((unsigned long)sp->pool_fmr[index]),
+ sp->pool_fmr[index]);
+ ha->stats.fmr_stats.map_fail_cnt++;
+ return -1;
+ }
+ ha->stats.fmr_stats.map_cnt++;
+ return 0;
+}
+
+void vhba_unmap_buf_fmr(struct virtual_hba *vhba, struct srb *sp, int tot_dsds)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ int index;
+
+ for (index = 0; index < tot_dsds; index++) {
+ if (sp->pool_fmr[index]) {
+ ib_fmr_pool_unmap(sp->pool_fmr[index]);
+ sp->pool_fmr[index] = 0;
+ }
+ }
+ ha->stats.fmr_stats.unmap_cnt++;
+}
+
+int vhba_send_init_blk(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ int ret;
+ struct init_block *init_blk;
+
+ vhba_init_rings(vhba);
+
+ if (!ha->request_ring) {
+ eprintk(vhba, "Error - null req ring ptr. Returning 1\n");
+ return 1;
+ }
+
+ init_blk = &ha->init_blk;
+
+ memset(init_blk, 0, sizeof(struct init_block));
+
+ init_blk->type = INIT_BLOCK;
+ init_blk->entry_size = sizeof(struct cmd_type_7);
+ init_blk->ring_size = ha->request_q_length;
+ init_blk->read_index_addr = ha->req_ring_rindex_dma;
+ init_blk->read_index_rkey = vhba->xsmp_info.mr->rkey;
+ init_blk->base_addr = ha->request_dma;
+ init_blk->base_addr_rkey = vhba->xsmp_info.mr->rkey;
+
+ dprintk(TRC_IB, vhba, "base (%Lx), rkey (%0x)\n",
+ init_blk->base_addr, init_blk->base_addr_rkey);
+ dprintk(TRC_IB, vhba, "read (%Lx), rrkey (%0x)\n",
+ init_blk->read_index_addr, init_blk->read_index_rkey);
+ dprintk(TRC_IB, vhba, "ring (%0x), entry (%0x)\n",
+ init_blk->ring_size, init_blk->entry_size);
+
+/* Init block index is 2048 (not overlapping with write_index 0 - 1023) */
+
+ ret = xscore_post_send(&vhba->data_conn.ctx, (u8 *) init_blk,
+ sizeof(struct init_block), XSCORE_DEFER_PROCESS);
+
+ if (ret) {
+ eprintk(vhba, "xscore_post_send() failed\n");
+ ha->stats.ib_stats.dqp_send_err_cnt++;
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ return 1;
+ }
+ return 0;
+}
+
+int vhba_send_write_index(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct vhba_write_index_msg *send_write_index_msg = 0;
+ int ret;
+
+ if ((ha->data_qp_handle == XSCORE_CONN_ERR) ||
+ (ha->control_qp_handle == XSCORE_CONN_ERR)) {
+ dprintk(TRC_IB_ERRS, vhba, "IB handle is -1\n");
+ return 1;
+ }
+ if ((ha->req_ring_windex < 0) ||
+ (ha->req_ring_windex >= ha->request_q_length)) {
+ eprintk(vhba, "Error - invalid req_ring_windex %d\n"
+ " in vhba_send_write_index\n", ha->req_ring_windex);
+ return 1;
+ }
+
+ if (!ha->send_buf_ptr[ha->req_ring_windex]) {
+ ha->send_buf_ptr[ha->req_ring_windex] =
+ kmalloc(sizeof(struct vhba_write_index_msg), GFP_ATOMIC);
+ if (!ha->send_buf_ptr[ha->req_ring_windex]) {
+ eprintk(vhba, "Error - kmalloc failed!\n");
+ return 1;
+ }
+ }
+
+ send_write_index_msg = ha->send_buf_ptr[ha->req_ring_windex];
+ ha->send_write_index_msg = send_write_index_msg;
+
+ if (!send_write_index_msg) {
+ eprintk(vhba, "Error - null send write index msg ptr.\n"
+ " Returning 1\n");
+ return 1;
+ }
+
+ send_write_index_msg->type = WRITE_INDEX_UPDATE;
+ send_write_index_msg->_reserved1 = 0x0;
+ send_write_index_msg->_reserved = 0x0;
+ send_write_index_msg->write_index = ha->req_ring_windex;
+
+ ret = xscore_post_send(&vhba->data_conn.ctx,
+ (u8 *) send_write_index_msg,
+ sizeof(struct vhba_write_index_msg),
+ XSCORE_DEFER_PROCESS);
+ if (ret) {
+ eprintk(vhba, "Error - xsigo ib send msg failed?\n");
+ send_write_index_msg = 0;
+ ha->stats.ib_stats.dqp_send_err_cnt++;
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ ib_link_down(ha);
+ return 1;
+ }
+
+ return 0;
+}
+
+int vhba_send_heart_beat(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct heart_beat_msg *hb_msg;
+ int ret = 0;
+
+ dprintk(TRC_FUNCS, vhba, "Entering...\n");
+
+ if (atomic_read(&ha->qp_status) != VHBA_QP_CONNECTED)
+ return 1;
+
+ if (atomic_read(&ha->ib_status) != VHBA_IB_UP)
+ return 1;
+
+ dprintk(TRC_IB, vhba, "handle is %d\n", ha->control_qp_handle);
+
+ hb_msg = kmalloc(sizeof(struct heart_beat_msg), GFP_ATOMIC);
+ if (!hb_msg) {
+ dprintk(TRC_IB_ERRS, vhba, "heart beat msg is not valid\n");
+ return 1;
+ }
+
+ hb_msg->type = VHBA_HEART_BEAT;
+ hb_msg->rsvd = 0;
+
+ dprintk(TRC_IB, vhba,
+ "sending hear beat msg on handle %d\n", ha->control_qp_handle);
+
+ if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED) {
+ dprintk(TRC_IB, vhba, "cqp hdl %d hb_msg ptr %p\n",
+ ha->control_qp_handle, hb_msg);
+ ret = xscore_post_send(&vhba->ctrl_conn.ctx,
+ (u8 *) hb_msg,
+ sizeof(struct heart_beat_msg),
+ XSCORE_DEFER_PROCESS);
+ }
+
+ if (ret) {
+ ha->stats.ib_stats.cqp_send_err_cnt++;
+ dprintk(TRC_IB_ERRS, vhba, "heart beat msg failed\n");
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ kfree(hb_msg);
+ }
+
+ dprintk(TRC_FUNCS, vhba, "Returning...\n");
+ return 0;
+}
+
+int vhba_send_enable_vhba(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct enable_msg *enable_msg;
+ int ret = 1;
+
+ enable_msg = kmalloc(sizeof(struct enable_msg), GFP_ATOMIC);
+ if (!enable_msg) {
+ dprintk(TRC_IB_ERRS, vhba, "enable_msg malloc error\n");
+ return 1;
+ }
+
+ memset(enable_msg, 0, sizeof(struct enable_msg));
+
+ enable_msg->type = ENABLE_VHBA_Q;
+ enable_msg->rsvd = 0;
+
+ dprintk(TRC_INFO, vhba, "sending enable vhba msg on Control Q Pair\n");
+
+ if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED) {
+ ret = xscore_post_send(&vhba->ctrl_conn.ctx,
+ (u8 *) enable_msg,
+ sizeof(struct enable_msg),
+ XSCORE_DEFER_PROCESS);
+ }
+ if (ret) {
+ ha->stats.ib_stats.cqp_send_err_cnt++;
+ eprintk(vhba, "Error - xscore_post_send() failed\n");
+ ib_link_down(ha);
+ kfree(enable_msg);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ }
+ ha->stats.fc_stats.enable_msg_cnt++;
+ return 0;
+}
+
+void vhba_data_send_comp_handler(void *client_arg, void *msg, int status, int n)
+{
+ u32 idr = (u32) (unsigned long)client_arg;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ vhba = vhba_get_context_by_idr(idr);
+ if (!vhba) {
+ eprintk(NULL, "Invalid client_arg received\n");
+ return;
+ }
+ ha = vhba->ha;
+
+ if (status) {
+ eprintk(vhba, "Data Send Completion error: status %d\n",
+ status);
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ goto out;
+ }
+out:
+ DEC_REF_CNT(vhba);
+}
+
+void vhba_ctrl_send_comp_handler(void *client_arg, void *msg, int status, int n)
+{
+ u32 idr = (u32) (unsigned long)client_arg;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ vhba = vhba_get_context_by_idr(idr);
+ if (!vhba) {
+ eprintk(NULL, "Invalid client_arg received\n");
+ return;
+ }
+ ha = vhba->ha;
+
+ if (status) {
+ eprintk(vhba, "Ctrl Send Completion error: status %d\n",
+ status);
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ goto out;
+ }
+ kfree(msg);
+out:
+ DEC_REF_CNT(vhba);
+}
+
+void vhba_cqp_recv_comp_handler(void *client_arg, void *msg, int sz,
+ int status, int n)
+{
+ u32 idr = (u32) (unsigned long)client_arg;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ vhba = vhba_get_context_by_idr(idr);
+ if (!vhba) {
+ eprintk(NULL, "Invalid client_arg received\n");
+ kfree(msg);
+ return;
+ }
+ ha = vhba->ha;
+
+ if (status) {
+ eprintk(vhba, "CQP Recv Completion error: status %d\n", status);
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ goto out;
+ }
+ process_cqp_msg(vhba, msg, sz);
+out:
+ kfree(msg);
+ DEC_REF_CNT(vhba);
+}
+
+/*
+ * Called from interrupt context
+ */
+
+void vhba_recv_comp_handler(void *client_arg, void *msg, int sz,
+ int status, int n)
+{
+ u32 idr = (u32) (unsigned long)client_arg;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ vhba = vhba_get_context_by_idr(idr);
+ if (!vhba) {
+ eprintk(NULL, "Invalid client_arg received\n");
+ return;
+ }
+ ha = vhba->ha;
+
+ if (status) {
+ eprintk(vhba, "Recv Completion error: status %d\n", status);
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ kfree(msg);
+ DEC_REF_CNT(vhba);
+ return;
+ }
+ process_dqp_msg(vhba, msg, sz);
+
+ kfree(msg);
+ DEC_REF_CNT(vhba);
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __VHBA_IB_H__
+#define __VHBA_IB_H__
+
+#include <linux/types.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_fmr_pool.h>
+#include "vhba_os_def.h"
+
+/* Control queue pair defines*/
+#define VHBA_CQP_SEND_Q_SZ 64
+#define VHBA_CQP_RECV_Q_SZ 64
+#define VHBA_CQP_MAX_BUF_SIZE 1024
+#define VHBA_CQP_MAX_CTRL_MSG_SIZE 1024
+
+/* Data queue pair defines */
+#define VHBA_DQP_SEND_Q_SZ 1400
+#define VHBA_DQP_RECV_Q_SZ 1400
+#define VHBA_DQP_MAX_BUF_SIZE 256
+#define VHBA_DQP_MAX_CTRL_MSG_SIZE 256
+
+/* VHBA QP States */
+#define VHBA_QP_NOTCONNECTED 0
+#define VHBA_QP_TRYCONNECTING 1
+#define VHBA_QP_RECONNECTING 2
+#define VHBA_QP_PARTIAL_CONNECT 3
+#define VHBA_QP_CONNECTED 4
+#define VHBA_QP_DISCONNECTED 5
+
+#define VHBA_IB_UP 0
+#define VHBA_IB_DOWN 1
+#define VHBA_IB_DEAD 2
+
+/* Queue pair type */
+#define QP_TYPE_CONTROL 0
+#define QP_TYPE_DATA 1
+
+/* Data queue pair direction */
+#define DATA_QP_TYPE_TX 1
+#define DATA_QP_TYPE_RX 2
+
+/* FMR defines */
+#define VHBA_FMR_POOL_SIZE 256
+#define VHBA_MAX_TRANSFER_SIZE 4080
+#define VHBA_DEFAULT_TRANSFER_SIZE 512
+#define VHBA_MAX_FMR_PAGES (((VHBA_DEFAULT_TRANSFER_SIZE * 512)/ \
+ (PAGE_SIZE)) + 2)
+#define VHBA_FMR_DIRTY_MARK 32
+#define VHBA_MAX_DSDS_IN_FMR ((VHBA_DEFAULT_TRANSFER_SIZE * 512)/ \
+ (PAGE_SIZE))
+
+#define TCA_SERVICE_ID 0x1001ULL
+
+struct scsi_xg_vhba_host;
+struct srb;
+
+int vhba_init_rings(struct virtual_hba *vhba);
+void process_cqp_msg(struct virtual_hba *vhba, u8 *msg, int length);
+void process_dqp_msg(struct virtual_hba *vhba, u8 *msg, int length);
+int vhba_xsmp_notify(xsmp_cookie_t xsmp_hndl, u64 resource_id, int notifycmd);
+
+void vhba_control_callback(void *client_arg, int event);
+void vhba_data_callback(void *client_arg, int event);
+
+int vhba_ib_disconnect_qp(struct virtual_hba *vhba);
+int vhba_ib_connect_qp(struct virtual_hba *vhba);
+int vhba_conn_init(struct virtual_hba *vhba);
+void vhba_unmap_buf_fmr(struct virtual_hba *vhba, struct srb *sp, int tot_dsds);
+void sp_put(struct virtual_hba *vhba, struct srb *sp);
+int vhba_map_buf_fmr(struct virtual_hba *vhba, u64 *phys_addr, int num_pgs,
+ u64 *mapped_fmr_iova, struct srb *sp, int index);
+int vhba_send_write_index(struct virtual_hba *vhba);
+int readjust_io_addr(struct srb *sp);
+int vhba_alloc_fmr_pool(struct virtual_hba *vhba);
+void vhba_dealloc_fmr_pool(struct virtual_hba *vhba);
+extern void vhba_ctrl_send_comp_handler(void *client_arg, void *msg, int status,
+ int n);
+extern void vhba_recv_comp_handler(void *client_arg, void *msg, int sz,
+ int status, int n);
+extern void vhba_cqp_recv_comp_handler(void *client_arg, void *msg, int sz,
+ int status, int n);
+
+#endif /* __VHBA_IB_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * The VHBA driver is an i/f driver for the Xsigo virtual HBA (VHBA)
+ */
+
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <scsi/scsi.h>
+#include <linux/interrupt.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_transport_fc.h>
+#include <rdma/ib_verbs.h>
+
+#include "vhba_ib.h"
+#include "vhba_defs.h"
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+#include "vhba_align.h"
+#include "vhba_scsi_intf.h"
+
+#include "xs_compat.h"
+
+static u32 vhba_target_bind(struct virtual_hba *vhba, u32 loop_id,
+ u8 *nwwn, u8 *pwwn, u32 port_id, s32 bound_value,
+ u32 lun_count, u8 *lun_map, u16 *lun_id,
+ u8 media_type);
+static u32 vhba_map_unbound_targets(struct virtual_hba *vhba);
+static struct os_tgt *vhba_tgt_alloc(struct virtual_hba *vhba, u32 tgt);
+static void process_status_cont_entry(struct virtual_hba *vhba,
+ struct sts_cont_entry *pkt);
+
+#define VHBA_CMD_TIMEOUT 18
+
+static inline void add_to_disc_ports(struct fc_port *fcport,
+ struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ha->list_lock, flags);
+ list_add_tail(&fcport->list, &ha->disc_ports);
+ spin_unlock_irqrestore(&ha->list_lock, flags);
+}
+
+int vhba_initialize(struct virtual_hba *vhba, struct vhba_xsmp_msg *msg)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ int rval = 0;
+
+ ha->flags.online = 0;
+ ha->device_flags = 0;
+
+ rval = vhba_alloc_fmr_pool(vhba);
+ if (rval) {
+ eprintk(vhba, "Trouble allocating FMR pool.\n"
+ " Returning %d\n", rval);
+ return -1;
+ }
+
+ /* Initialize VHBA request, IB queues, etc */
+ rval = vhba_init_rings(vhba);
+ if (rval) {
+ eprintk(vhba, "Trouble initializing rings.\n"
+ " Returning %d\n", rval);
+ vhba_dealloc_fmr_pool(vhba);
+ }
+ return rval;
+}
+
+int vhba_init_rings(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ int i;
+
+ for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++)
+ ha->outstanding_cmds[i] = NULL;
+
+ ha->current_outstanding_cmd = 0;
+
+ ha->request_ring_ptr = ha->request_ring;
+ *ha->req_ring_rindex = 0;
+ ha->req_ring_windex = 0;
+ ha->req_q_cnt = ha->request_q_length;
+
+ return 0;
+}
+
+void complete_cmd_and_callback(struct virtual_hba *vhba, struct srb *sp,
+ struct scsi_cmnd *cp)
+{
+ int sg_count;
+ u32 request_bufflen;
+ struct scatterlist *request_buffer;
+
+ /*
+ * Grab the outstanding command
+ * make the callback and pass the status
+ */
+ if (sp && cp) {
+ if (sp->cmd != NULL) {
+ sg_count = scsi_sg_count(sp->cmd);
+ request_buffer = scsi_sglist(sp->cmd);
+ request_bufflen = scsi_bufflen(sp->cmd);
+
+ if (sp->flags & SRB_DMA_VALID) {
+ sp->flags &= ~SRB_DMA_VALID;
+ /* Ummap the memory used for this I/O */
+ if (sg_count) {
+ ib_dma_unmap_sg(vhba->
+ xsmp_info.ib_device,
+ request_buffer,
+ sg_count,
+ sp->
+ cmd->sc_data_direction);
+
+ vhba_unmap_buf_fmr(vhba,
+ sp, sp->tot_dsds);
+
+ } else if (request_bufflen) {
+ ib_dma_unmap_single(vhba->xsmp_info.
+ ib_device, sp->dma_handle,
+ request_bufflen,
+ sp->cmd->sc_data_direction);
+
+ vhba_unmap_buf_fmr(vhba, sp,
+ sp->tot_dsds);
+ if (sp->unaligned_sg)
+ vhba_tear_bounce_buffer(sp);
+ }
+ }
+ } else
+ dprintk(TRC_ERRORS, vhba, "sp cmd null\n");
+ sp_put(vhba, sp);
+ }
+}
+
+int vhba_purge_pending_ios(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct srb *sp;
+ struct scsi_cmnd *cp;
+ int i, queue_num;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&ha->io_lock, flags);
+ for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+ if (ha->outstanding_cmds[i]) {
+ sp = ha->outstanding_cmds[i];
+ cp = sp->cmd;
+ cp->result = DID_NO_CONNECT << 16;
+ /* Delete SCSI timer */
+ if (sp->timer.function != NULL) {
+ del_timer(&sp->timer);
+ sp->timer.function = NULL;
+ sp->timer.data = (unsigned long)NULL;
+ }
+ ha->outstanding_cmds[i] = NULL;
+ CMD_SP(sp->cmd) = NULL;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ complete_cmd_and_callback(vhba, sp, cp);
+ DEC_REF_CNT(vhba);
+ spin_lock_irqsave(&ha->io_lock, flags);
+ queue_num = sp->queue_num;
+
+ dprintk(TRC_SCSI, vhba,
+ "dec q cnt for vhba %p q %d\n",
+ vhba, queue_num);
+ if (atomic_read
+ (&ha->stats.io_stats.num_vh_q_reqs[queue_num]) != 0)
+ atomic_dec(&ha->stats.io_stats.
+ num_vh_q_reqs[queue_num]);
+ }
+ }
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ return 0;
+}
+
+void vhba_taskmgmt_flush_ios(struct virtual_hba *vhba, int tgt_id, int lun,
+ int lun_reset_flag)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct srb *sp;
+ struct scsi_cmnd *cp;
+ int i, queue_num;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&ha->io_lock, flags);
+ for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+ if (ha->outstanding_cmds[i]) {
+ sp = ha->outstanding_cmds[i];
+ cp = sp->cmd;
+ if ((lun_reset_flag && (cp->device->id == tgt_id) &&
+ (cp->device->lun == lun)) ||
+ ((lun_reset_flag == 0) &&
+ (cp->device->id == tgt_id))) {
+
+ cp->result = DID_NO_CONNECT << 16;
+ if (sp->timer.function != NULL) {
+ del_timer(&sp->timer);
+ sp->timer.function = NULL;
+ sp->timer.data = (unsigned long)NULL;
+ }
+ ha->outstanding_cmds[i] = NULL;
+ CMD_SP(sp->cmd) = NULL;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+
+ complete_cmd_and_callback(vhba, sp, cp);
+ DEC_REF_CNT(vhba);
+
+ spin_lock_irqsave(&ha->io_lock, flags);
+
+ queue_num = sp->queue_num;
+
+ dprintk(TRC_SCSI, vhba,
+ "dec q cnt for vhba %p q %d\n",
+ vhba, queue_num);
+ if (atomic_read
+ (&ha->stats.io_stats.
+ num_vh_q_reqs[queue_num]) != 0)
+ atomic_dec(&ha->stats.io_stats.
+ num_vh_q_reqs[queue_num]);
+ }
+ }
+ }
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+}
+
+void process_status_entry(struct virtual_hba *vhba, struct sts_entry_24xx *sts)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct srb *sp;
+ struct scsi_cmnd *cp;
+ struct os_tgt *tq;
+ unsigned long flags;
+ u8 *rsp_info, *sense_data;
+ u8 *cdb_ptr, *byte_ptr;
+ u8 lscsi_status;
+ u16 comp_status, scsi_status;
+ s32 resid;
+ u32 sense_len, rsp_info_len, resid_len;
+ u32 queue_num;
+ u32 request_bufflen;
+
+ byte_ptr = (u8 *) sts;
+ byte_ptr = byte_ptr + 8;
+ sts = (struct sts_entry_24xx *)byte_ptr;
+ cdb_ptr = byte_ptr;
+
+ sts->handle &= 0x000003ff;
+ comp_status = le16_to_cpu(sts->comp_status);
+ scsi_status = le16_to_cpu(sts->scsi_status) & SS_MASK;
+
+ ha->stats.io_stats.total_io_rsp++;
+
+ dprintk(TRC_IO, vhba, "comp status %x scsi_status %x handle %x\n",
+ (int)le16_to_cpu(sts->comp_status),
+ (int)le16_to_cpu(sts->scsi_status),
+ (int)le16_to_cpu(sts->handle));
+
+ if (sts->handle < MAX_OUTSTANDING_COMMANDS) {
+
+ spin_lock_irqsave(&ha->io_lock, flags);
+ sp = ha->outstanding_cmds[sts->handle];
+
+ if (sp) {
+ queue_num = sp->queue_num;
+
+ atomic_dec(&ha->stats.
+ io_stats.num_vh_q_reqs[queue_num]);
+
+ if (sp->state == VHBA_IO_STATE_ABORTING) {
+ dprintk(TRC_INFO, vhba,
+ "Aborting IO: sp:0x%p, sp->cmd:0x%p\n",
+ sp, sp->cmd);
+
+ dprintk(TRC_ERR_RECOV, vhba,
+ "scsi_status= 0x%x\n",
+ (int)le16_to_cpu(sts->scsi_status));
+
+ sp->state = VHBA_IO_STATE_ABORTED;
+ sp->abort_cnt = 0;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ return;
+ }
+ if (sp->state == VHBA_IO_STATE_ABORT_FAILED) {
+ sp->state = VHBA_IO_STATE_ABORT_NEEDED;
+ sp->abort_cnt = 0;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ return;
+ }
+
+ ha->outstanding_cmds[sts->handle] = NULL;
+ CMD_SP(sp->cmd) = NULL;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+
+ } else {
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ dprintk(TRC_SCSI_ERRS, vhba, "sp is null for hndl %d\n",
+ (int)sts->handle);
+ }
+
+ } else if (sts->handle == MAX_OUTSTANDING_COMMANDS) {
+ /*
+ * This indicates completion of a tsk mgmt command
+ * No corresponding sp to worry about
+ */
+ dprintk(TRC_ERRORS, vhba,
+ "Returning erroneously: hndl is 1024!\n");
+ return;
+ } else
+ sp = NULL;
+
+ if (sp == NULL) {
+ dprintk(TRC_SCSI_ERRS, vhba, "sp is null. sts_handle= %u\n"
+ " curr hndl = %u\n", (u32) sts->handle,
+ (u32) ha->current_outstanding_cmd);
+ /* Reset this adapter or I/O card, etc */
+ return;
+ }
+
+ cp = sp->cmd;
+ if (cp == NULL) {
+ dprintk(TRC_ERRORS, vhba, "cmd already returned to OS\n"
+ " hndl %u sp %p sp->state %d\n",
+ (u32)sts->handle, sp, sp->state);
+ return;
+ }
+ /*
+ * When abort is happening (sp is searched) so can't change
+ * the sp. Quietly store this response somewhere to be
+ * processed once this sp search is over
+ */
+ if (sp->state == 1) {
+ dprintk(TRC_ERRORS, vhba, "Command already aborted\n");
+ return;
+ }
+ request_bufflen = scsi_bufflen(sp->cmd);
+
+ /* Delete SCSI timer */
+ if (sp->timer.function != NULL) {
+ del_timer(&sp->timer);
+ sp->timer.function = NULL;
+ sp->timer.data = (unsigned long)NULL;
+ }
+
+ if (sts->entry_type == COMMAND_TYPE_7) {
+ dprintk(TRC_ERRORS, vhba,
+ "Received type 7 iocb back from QL\n");
+ cp->result = DID_NO_CONNECT << 16;
+ complete_cmd_and_callback(vhba, sp, cp);
+ DEC_REF_CNT(vhba);
+ return;
+ }
+
+ /* Decrement actthreads if used */
+
+ lscsi_status = scsi_status & STATUS_MASK;
+
+ CMD_ENTRY_STATUS(cp) = sts->entry_status;
+ CMD_COMPL_STATUS(cp) = comp_status;
+ CMD_SCSI_STATUS(cp) = scsi_status;
+
+ sense_len = rsp_info_len = resid_len = 0;
+
+ sense_len = le32_to_cpu(sts->sense_len);
+ rsp_info_len = le32_to_cpu(sts->rsp_data_len);
+ resid_len = le32_to_cpu(sts->rsp_residual_count);
+ rsp_info = sts->data;
+ sense_data = sts->data;
+ host_to_fcp_swap(sts->data, sizeof(sts->data));
+
+ /* Check for any FCP transport errors. */
+ if (scsi_status & SS_RESPONSE_INFO_LEN_VALID) {
+ sense_data += rsp_info_len;
+ if (rsp_info_len > 3 && rsp_info[3]) {
+ eprintk(vhba,
+ "scsi(%ld:%d:%d:%d) FCP I/O protocol failure ",
+ ha->host_no, cp->device->channel,
+ (int)cp->device->id, (int)cp->device->lun);
+ eprintk(vhba,
+ " (%x/%02x%02x%02x%02x%02x%02x%02x%02x)... ",
+ rsp_info_len, rsp_info[0], rsp_info[1],
+ rsp_info[2], rsp_info[3], rsp_info[4],
+ rsp_info[5], rsp_info[6], rsp_info[7]);
+ eprintk(vhba, "retrying command\n");
+ cp->result = DID_BUS_BUSY << 16;
+ complete_cmd_and_callback(vhba, sp, cp);
+ DEC_REF_CNT(vhba);
+ return;
+ }
+ } else {
+ rsp_info_len = 0;
+ }
+
+ /* Based on Host and scsi status generate status code for Linux */
+ switch (comp_status) {
+ case CS_COMPLETE:
+ if (scsi_status == 0) {
+ dprintk(TRC_IO, vhba, "hndl %d: sts ok\n",
+ (int)sts->handle);
+ cp->result = DID_OK << 16;
+ break;
+ }
+
+ if (scsi_status & (SS_RESIDUAL_UNDER | SS_RESIDUAL_OVER)) {
+ resid = resid_len;
+ scsi_set_resid(cp, resid);
+ CMD_RESID_LEN(cp) = resid;
+ }
+
+ cp->result = DID_OK << 16 | lscsi_status;
+ if (lscsi_status == SS_BUSY_CONDITION)
+ break;
+ if (lscsi_status != SS_CHECK_CONDITION)
+ break;
+
+ /* Copy Sense Data into sense buffer. */
+ memset(cp->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
+
+ if (!(scsi_status & SS_SENSE_LEN_VALID))
+ break;
+
+ if (sense_len >= sizeof(cp->sense_buffer))
+ sense_len = sizeof(cp->sense_buffer);
+
+ sp->request_sense_length = sense_len;
+ sp->request_sense_ptr = cp->sense_buffer;
+
+ if (sp->request_sense_length >
+ (sizeof(sts->data) - rsp_info_len))
+ sense_len = sizeof(sts->data) - rsp_info_len;
+
+ memcpy(cp->sense_buffer, sense_data, sense_len);
+ CMD_ACTUAL_SNSLEN(cp) = sense_len;
+ sp->request_sense_ptr += sense_len;
+ sp->request_sense_length -= sense_len;
+ if (sp->request_sense_length != 0)
+ ha->status_srb = sp;
+
+ dprintk(TRC_SCSI_ERRS, vhba, "Check condition Sense data,\n"
+ "scsi(%ld:%d:%d:%d) scsi_status = %d\n",
+ (long)ha->host_no, (int)cp->device->channel,
+ (int)cp->device->id, (int)cp->device->lun, scsi_status);
+
+ break;
+
+ case CS_DATA_UNDERRUN:
+ dprintk(TRC_SCSI, vhba, "UNDERRUN detected\n");
+
+ resid = resid_len;
+ if (scsi_status & SS_RESIDUAL_UNDER) {
+ scsi_set_resid(cp, resid);
+ CMD_RESID_LEN(cp) = resid;
+ }
+
+ /*
+ * Check to see if SCSI Status is non zero. If so report SCSI
+ * Status.
+ */
+ if (lscsi_status != 0) {
+ cp->result = DID_OK << 16 | lscsi_status;
+ if (lscsi_status == SS_BUSY_CONDITION)
+ break;
+ if (lscsi_status != SS_CHECK_CONDITION)
+ break;
+
+ /* Copy Sense Data into sense buffer */
+ memset(cp->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
+
+ if (!(scsi_status & SS_SENSE_LEN_VALID))
+ break;
+
+ if (sense_len >= sizeof(cp->sense_buffer))
+ sense_len = sizeof(cp->sense_buffer);
+
+ sp->request_sense_length = sense_len;
+ sp->request_sense_ptr = cp->sense_buffer;
+
+ if (sp->request_sense_length >
+ (sizeof(sts->data) - rsp_info_len))
+ sense_len = sizeof(sts->data) - rsp_info_len;
+
+ memcpy(cp->sense_buffer, sense_data, sense_len);
+ CMD_ACTUAL_SNSLEN(cp) = sense_len;
+
+ sp->request_sense_ptr += sense_len;
+ sp->request_sense_length -= sense_len;
+ if (sp->request_sense_length != 0)
+ ha->status_srb = sp;
+
+ dprintk(TRC_SCSI_ERRS, vhba,
+ "Check condition Sense data, ");
+ dprintk(TRC_SCSI_ERRS, vhba,
+ "scsi(%ld:%d:%d:%d) cmd=%p pid=%ld\n",
+ ha->host_no, cp->device->channel,
+ (int)cp->device->id, (int)cp->device->lun, cp,
+ cp->serial_number);
+
+ } else {
+
+ /*
+ * If RISC reports underrun and target does not report
+ * it then we must have a lost frame, so tell upper
+ * layer to retry it by reporting a bus busy.
+ */
+ if (!(scsi_status & SS_RESIDUAL_UNDER)) {
+ eprintk(vhba, "scsi(%ld:%d:%d:%d) Dropped\n",
+ ha->host_no, cp->device->channel,
+ (int)cp->device->id,
+ (int)cp->device->lun);
+ eprintk(vhba,
+ "frame(s) detected (%x of %d bytes)..",
+ resid, (u32)request_bufflen);
+ eprintk(vhba, "retrying command.\n");
+
+ cp->result = DID_BUS_BUSY << 16;
+
+ break;
+ }
+
+ /* Handle mid-layer underflow */
+ if ((unsigned)(request_bufflen - resid) <
+ cp->underflow) {
+ eprintk(vhba, "scsi(%ld:%d:%d:%d):Mid-layer\n",
+ ha->host_no, cp->device->channel,
+ (int)cp->device->id,
+ (int)cp->device->lun);
+ eprintk(vhba,
+ "underflow detected (%x of %d bytes) ",
+ resid, (u32)request_bufflen);
+ eprintk(vhba, "...returning error status.\n");
+ cp->result = DID_ERROR << 16;
+ break;
+ }
+
+ cp->result = DID_OK << 16;
+ }
+ break;
+
+ case CS_DATA_OVERRUN:
+
+ eprintk(vhba, "scsi(%ld:%d:%d): OVERRUN status detected\n",
+ ha->host_no, (int)cp->device->id, (int)cp->device->lun);
+ eprintk(vhba, " 0x%x-0x%x\n", comp_status, scsi_status);
+ dprintk(TRC_SCSI_ERRS, vhba, "CDB: 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+ cp->cmnd[0], cp->cmnd[1], cp->cmnd[2], cp->cmnd[3],
+ cp->cmnd[4]);
+ dprintk(TRC_SCSI_ERRS, vhba, " 0x%x\n", cp->cmnd[5]);
+
+ dprintk(TRC_SCSI_ERRS, vhba, "PID=0x%lx req=0x%x xtra=0x%x --",
+ cp->serial_number, request_bufflen, resid_len);
+ dprintk(TRC_SCSI_ERRS, vhba, "\nreturning DID_ERROR status\n");
+ cp->result = DID_ERROR << 16;
+ break;
+
+ case CS_PORT_LOGGED_OUT:
+ case CS_PORT_CONFIG_CHG:
+ case CS_PORT_BUSY:
+ case CS_INCOMPLETE:
+ case CS_PORT_UNAVAILABLE:
+ /*
+ * If the port is in Target Down state, return all IOs for this
+ * Target with DID_NO_CONNECT ELSE Queue the IOs in the
+ * retry_queue.
+ */
+ tq = TGT_Q(ha, cp->device->id);
+ if (tq) {
+ dprintk(TRC_INFO, vhba,
+ "Port Down: Logged Out/Unavailable: ");
+ dprintk(TRC_INFO, vhba,
+ "port_id:0x%x, PWWN:%lx comp_status=0x%x\n",
+ tq->fcport->d_id.b24, (unsigned long)
+ wwn_to_u64(tq->fcport->port_name), comp_status);
+ }
+ cp->result = DID_BUS_BUSY << 16;
+ break;
+
+ case CS_RESET:
+ dprintk(TRC_INFO, vhba,
+ "CS_RESET:cp=%p, scsi_status=0x%x\n", cp, scsi_status);
+
+ cp->result = DID_RESET << 16;
+ break;
+
+ case CS_ABORTED:
+ /*
+ * hv2.19.12 - DID_ABORT does not retry the request if we
+ * aborted this request then abort otherwise it must be a
+ * reset.
+ */
+ dprintk(TRC_INFO, vhba,
+ "CS_ABORTED, cp=%p, scsi_status=0x%x\n", cp,
+ scsi_status);
+
+ cp->result = DID_RESET << 16;
+ break;
+
+ case CS_TIMEOUT:
+ cp->result = DID_BUS_BUSY << 16;
+
+ vhba->cs_timeout_count++;
+ dprintk(TRC_INFO, vhba,
+ "CS_TIMEOUT for cmd=%p, opcode/len/status 0x%x/0x%x/0x%x\n",
+ cp, cp->cmnd[0], scsi_bufflen(cp), scsi_status);
+ break;
+
+ case CS_QUEUE_FULL:
+ dprintk(TRC_INFO, vhba, "scsi(%ld): QUEUE FULL status\n",
+ ha->host_no);
+ dprintk(TRC_INFO, vhba, " detected 0x%x-0x%x\n", comp_status,
+ scsi_status);
+
+ /* SCSI Mid-Layer handles device queue full */
+ cp->result = DID_OK << 16 | lscsi_status;
+ break;
+
+ case CS_DMA:
+ dprintk(TRC_INFO, vhba, "dma error\n");
+ cp->result = DID_NO_CONNECT << 16;
+ break;
+
+ default:
+ eprintk(vhba, "SCSI error with unknown status\n");
+ eprintk(vhba, " 0x%x-0x%x\n", comp_status, scsi_status);
+
+ cp->result = DID_ERROR << 16;
+ break;
+ }
+
+ /* If no continuation stat */
+ if (ha->status_srb == NULL) {
+ complete_cmd_and_callback(vhba, sp, cp);
+ DEC_REF_CNT(vhba);
+ } else {
+ struct sts_cont_entry *ptr;
+
+ if (sts->entry_count > 1) {
+ dprintk(TRC_SCSI_ERRS, vhba, "non null sts srb!\n");
+ ptr = (struct sts_cont_entry *)(byte_ptr +
+ sizeof(struct
+ sts_entry_24xx));
+ process_status_cont_entry(vhba, ptr);
+ } else {
+ sp->request_sense_length = 0;
+ complete_cmd_and_callback(vhba, sp, cp);
+ DEC_REF_CNT(vhba);
+ }
+ }
+}
+
+static
+void process_status_cont_entry(struct virtual_hba *vhba,
+ struct sts_cont_entry *pkt)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct srb *sp = ha->status_srb;
+ struct scsi_cmnd *cp;
+ u8 sense_sz;
+
+ if (sp != NULL) {
+ cp = sp->cmd;
+ if (cp == NULL) {
+ eprintk(vhba, "Cmd already returned back\n");
+ eprintk(vhba, " to OS sp %p sp->state %d\n",
+ sp, sp->state);
+ ha->status_srb = NULL;
+ return;
+ }
+
+ if (sp->request_sense_length != 0) {
+ if (sp->request_sense_length > sizeof(pkt->data))
+ sense_sz = sizeof(pkt->data);
+ else
+ sense_sz = sp->request_sense_length;
+
+ host_to_fcp_swap(pkt->data, sizeof(pkt->data));
+
+ dprintk(TRC_IO, vhba, "memcpy of %d bytes\n", sense_sz);
+ memcpy(sp->request_sense_ptr, pkt->data, sense_sz);
+
+ ha->status_srb = NULL;
+ }
+ complete_cmd_and_callback(vhba, sp, cp);
+ DEC_REF_CNT(vhba);
+ }
+}
+
+void process_dqp_msg(struct virtual_hba *vhba, u8 *msg, int length)
+{
+ int type;
+ struct abort_entry_24xx *abt;
+
+ type = *(u8 *) (msg + 8);
+
+ if ((type == STATUS_TYPE) || (type == COMMAND_TYPE_7))
+ process_status_entry(vhba, (struct sts_entry_24xx *)msg);
+ else if (type == STATUS_CONT_TYPE)
+ process_status_cont_entry(vhba, (struct sts_cont_entry *)msg);
+ else if (type == ABORT_IOCB_TYPE) {
+ abt = (struct abort_entry_24xx *)msg;
+ if (abt->nport_handle) {
+ eprintk(vhba, "Could not Abort the command indexed\n");
+ eprintk(vhba, " by handle %d\n", abt->handle);
+ }
+ } else
+ eprintk(vhba, "Unknown message from VH\n");
+}
+
+int vhba_set_tgt_offline(struct virtual_hba *vhba, struct os_tgt *tq)
+{
+ int tgt = tq->fcport->os_target_id;
+
+ dprintk(TRC_TIMER, vhba, "RSCN: setting tgt %d offline\n", tgt);
+ atomic_set(&tq->fcport->state, FCS_DEVICE_LOST);
+
+ return 0;
+}
+
+int vhba_set_all_tgts_offline(struct virtual_hba *vhba)
+{
+ int tgt;
+ struct os_tgt *tq;
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ tq = TGT_Q(ha, tgt);
+ if (!tq)
+ continue;
+ vhba_set_tgt_offline(vhba, tq);
+ }
+ return 0;
+}
+
+int vhba_set_tgt_online(struct virtual_hba *vhba, struct os_tgt *tq)
+{
+ atomic_set(&tq->fcport->state, FCS_ONLINE);
+ set_bit(TQF_ONLINE, &tq->flags);
+ return 0;
+}
+
+static inline struct fc_rport *xg_rport_add(struct fc_port *fcport,
+ struct scsi_xg_vhba_host *ha)
+{
+ struct fc_rport_identifiers rport_ids;
+ struct fc_rport *rport;
+
+ rport_ids.node_name = wwn_to_u64(fcport->node_name);
+ rport_ids.port_name = wwn_to_u64(fcport->port_name);
+ rport_ids.port_id = fcport->d_id.b.domain << 16 |
+ fcport->d_id.b.area << 8 | fcport->d_id.b.al_pa;
+ rport_ids.roles = FC_PORT_ROLE_FCP_TARGET; /* Hardcode the role */
+ fcport->rport = rport = fc_remote_port_add(ha->host, 0, &rport_ids);
+ if (!rport) {
+ pr_err("FC remote port add failed\n");
+ return NULL;
+ }
+ pr_info("scsi(%ld:%d)\n", ha->host_no, fcport->os_target_id);
+ pr_info(" rport_add: PWWN:%lx NWWN:%lx PORT_ID:%x\n",
+ (unsigned long)rport_ids.port_name,
+ (unsigned long)rport_ids.node_name, rport_ids.port_id);
+ rport->supported_classes = fcport->supported_classes;
+ *((struct fc_port **) rport->dd_data) = fcport;
+ fc_remote_port_rolechg(rport, rport_ids.roles);
+ return rport;
+}
+
+void vhba_update_rports(struct work_struct *work)
+{
+ struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+ work);
+ int tgt;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ vhba = vhba_get_context_by_idr(xwork->idr);
+ if (vhba == NULL) {
+ dprintk(TRC_INFO, NULL,
+ "Could not find vhba for updating rport\n");
+ goto out;
+ }
+ ha = vhba->ha;
+
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ struct os_tgt *tq;
+
+ tq = TGT_Q(ha, tgt);
+ if (tq && tq->fcport) {
+ eprintk(vhba, "rport = %p, state = %d\n",
+ tq->fcport->rport,
+ atomic_read(&tq->fcport->state));
+
+ if (atomic_read(&tq->fcport->state) == FCS_ONLINE) {
+ /* Check if you've already reported the rport */
+ if (tq->fcport->rport) {
+ continue;
+ } else {
+ eprintk(vhba, "Updating rports\n");
+ tq->fcport->rport =
+ xg_rport_add(tq->fcport, ha);
+ if (!tq->fcport->rport)
+ eprintk(ha->vhba,
+ "Error registering ");
+ eprintk(ha->vhba,
+ "scsi(%ld:%d)\n",
+ ha->host_no,
+ tq->
+ fcport->os_target_id);
+ }
+ } else {
+ struct fc_rport *remote_port;
+
+ if ((tq->fcport->rport) &&
+ (atomic_read(&tq->fcport->state)
+ == FCS_DEVICE_DEAD)) {
+ /* Target dead remove rport from OS */
+ eprintk(ha->vhba,
+ "removing scsi(%ld:%d) ",
+ ha->host_no,
+ tq->fcport->os_target_id);
+ eprintk(ha->vhba,
+ "state: 0x%x\n",
+ atomic_read(&tq->
+ fcport->state));
+ remote_port = tq->fcport->rport;
+ tq->fcport->rport = NULL;
+ fc_remote_port_delete(remote_port);
+ }
+ }
+ }
+ }
+ DEC_REF_CNT(vhba);
+ vhba->scan_reqd = 1;
+out:
+ kfree(xwork);
+}
+
+void schedule_update_rports(struct scsi_xg_vhba_host *ha)
+{
+ struct xsvhba_work *xwork =
+ kmalloc(sizeof(struct xsvhba_work), GFP_ATOMIC);
+
+ if (!xwork) {
+ eprintk(NULL, "Error allocating work\n");
+ return;
+ }
+ xwork->idr = ha->vhba->idr;
+ INIT_WORK(&xwork->work, vhba_update_rports);
+ queue_work(vhba_workqueuep, &xwork->work);
+}
+
+void vhba_handle_scan(struct work_struct *work)
+{
+ struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+ work);
+
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ vhba = vhba_get_context_by_idr(xwork->idr);
+ if (vhba == NULL) {
+ dprintk(TRC_INFO, NULL, "Could not find vhba for scan\n");
+ goto out;
+ }
+ ha = vhba->ha;
+
+ if (atomic_read(&vhba->vhba_state) == VHBA_STATE_SCAN) {
+ if (vhba->scanned_once == 0) {
+ vhba->scanned_once = 1;
+
+ } else {
+ dprintk(TRC_INFO, vhba, "(target_count = %d)\n",
+ ha->target_count);
+ dprintk(TRC_INFO, vhba, " max_targets = %d)\n",
+ ha->max_targets);
+ if ((ha->target_count > 0) || (ha->max_targets > 0)) {
+ u32 t_id;
+ struct os_tgt *tq;
+ struct scsi_device *device;
+
+ dprintk(TRC_INFO, vhba,
+ "changing to VHBA_STATE_ACTIVE ");
+ dprintk(TRC_INFO, vhba,
+ "since we have targets..\n");
+
+ for (t_id = 0; t_id < ha->max_targets; t_id++) {
+ tq = TGT_Q(ha, t_id);
+ if (!tq)
+ continue;
+ if (atomic_read(&ha->link_state) !=
+ LINK_DOWN &&
+ atomic_read(&tq->fcport->state) !=
+ FCS_DEVICE_LOST) {
+ device =
+ scsi_device_lookup(ha->host,
+ 0, t_id,
+ 0);
+
+ if (device == NULL)
+ continue;
+ if (device->sdev_state ==
+ SDEV_OFFLINE) {
+ device->sdev_state =
+ SDEV_RUNNING;
+ }
+ scsi_device_put(device);
+ }
+ }
+ }
+ }
+ atomic_set(&vhba->vhba_state, VHBA_STATE_ACTIVE);
+ }
+
+ /* on the first install, it seems like you might need to register
+ this here. TGT update messages don't come in the first time. */
+ schedule_update_rports(ha);
+
+ DEC_REF_CNT(vhba);
+out:
+ kfree(xwork);
+}
+
+void vhba_handle_targets(struct virtual_hba *vhba,
+ struct vhba_tgt_status_msg tgt_status_msg, int *found)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct os_tgt *tq;
+ int loop_id = (u32) be16_to_cpu(tgt_status_msg.loop_id);
+ int tgt, k, lun_count;
+
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ tq = TGT_Q(ha, tgt);
+ if (tq && (memcmp(tgt_status_msg.wwpn, tq->fcport->port_name,
+ WWN_SIZE) == 0)) {
+ *found = 1;
+ if (atomic_read(&tq->fcport->state) != FCS_ONLINE) {
+ ha->stats.fc_stats.rscn_up_cnt++;
+ atomic_set(&tq->fcport->state, FCS_ONLINE);
+ set_bit(TQF_ONLINE, &tq->flags);
+ dprintk(TRC_INFO, vhba,
+ "RSCN:Target online");
+ dprintk(TRC_INFO, vhba,
+ " msg received: PWWN: %llx, ",
+ wwn_to_u64(tgt_status_msg.wwpn));
+ dprintk(TRC_INFO, vhba,
+ "port_id: 0x%x,loop_id: 0x%x\n",
+ be32_to_cpu(tgt_status_msg.port_id),
+ loop_id);
+ dprintk(TRC_INFO, vhba,
+ "RSCN: old PWWN: %llx, old port_id: ",
+ wwn_to_u64(tq->fcport->port_name));
+ dprintk(TRC_INFO, vhba,
+ "0x%x, old loop_id: 0x%x\n",
+ tq->fcport->d_id.b24,
+ tq->fcport->loop_id);
+ tq->fcport->loop_id = (u32)
+ be16_to_cpu(tgt_status_msg.loop_id);
+ tq->fcport->d_id.b24 = tq->d_id.b24 =
+ be32_to_cpu(tgt_status_msg.port_id);
+ for (k = 0; k < WWN_SIZE; k++)
+ tq->port_name[k] =
+ tgt_status_msg.wwpn[k];
+
+ lun_count = (u32)
+ be16_to_cpu(tgt_status_msg.lun_count);
+ if (lun_count != tq->fcport->lun_count) {
+ dprintk(TRC_INFO, vhba,
+ "RSCN Target online: lun count is ");
+ dprintk(TRC_INFO, vhba, "different\n");
+ vhba->scan_reqd = 1;
+ } else {
+ for (k = 0; k < lun_count; k++) {
+ if (tq->fcport->lun_ids[k] !=
+ tgt_status_msg.lun_ids[k]) {
+ dprintk(TRC_INFO,
+ vhba,
+ "RSCN Target ");
+ dprintk(TRC_INFO,
+ vhba,
+ "online:lun id ");
+ dprintk(TRC_INFO,
+ vhba,
+ "different\n");
+ vhba->scan_reqd = 1;
+ break;
+ }
+ }
+ }
+ for (k = 0; k < MAX_FIBRE_LUNS; k++)
+ tq->fcport->lun_ids[k] = -1;
+ for (k = 0; k < lun_count; k++)
+ tq->fcport->lun_ids[k] =
+ tgt_status_msg.lun_ids[k];
+
+ dprintk(TRC_INFO, NULL,
+ "New Lun_count= %d\n", lun_count);
+ tq->fcport->lun_count = lun_count;
+ memcpy(tq->fcport->port_name, tq->port_name,
+ WWN_SIZE);
+ vhba_set_tgt_online(vhba, tq);
+ } else {
+ /*
+ * Already in up state no need to process...
+ */
+ dprintk(TRC_INFO, vhba,
+ "RSCN:Target online");
+ dprintk(TRC_INFO, vhba,
+ " msg received for already enabled");
+ dprintk(TRC_INFO, vhba,
+ " device PWWN: %llx, ",
+ wwn_to_u64(tgt_status_msg.wwpn));
+ dprintk(TRC_INFO, vhba,
+ "port_id: 0x%x,loop_id: 0x%x\n",
+ be32_to_cpu(tgt_status_msg.port_id),
+ loop_id);
+ dprintk(TRC_INFO, vhba,
+ "RSCN: old PWWN: %llx, old port_id: ",
+ wwn_to_u64(tq->fcport->port_name));
+ dprintk(TRC_INFO, vhba,
+ "0x%x, old loop_id: 0x%x\n",
+ tq->fcport->d_id.b24,
+ tq->fcport->loop_id);
+
+ ha->stats.fc_stats.rscn_multiple_up_cnt++;
+ tq->fcport->loop_id = (u32)
+ be16_to_cpu(tgt_status_msg.loop_id);
+ tq->fcport->d_id.b24 = tq->d_id.b24 =
+ be32_to_cpu(tgt_status_msg.port_id);
+ for (k = 0; k < WWN_SIZE; k++)
+ tq->port_name[k] =
+ tgt_status_msg.wwpn[k];
+ lun_count = (u32)
+ be16_to_cpu(tgt_status_msg.lun_count);
+
+ if (lun_count != tq->fcport->lun_count) {
+ dprintk(TRC_INFO, vhba,
+ "RSCN Target already online: lun");
+ dprintk(TRC_INFO, vhba,
+ " count is different\n");
+ vhba->scan_reqd = 1;
+ } else {
+ for (k = 0; k < lun_count; k++) {
+ if (tq->fcport->lun_ids[k] !=
+ tgt_status_msg.lun_ids[k]) {
+ dprintk(TRC_INFO,
+ vhba, "RSCN ");
+ dprintk(TRC_INFO,
+ vhba, "Target already");
+ dprintk(TRC_INFO,
+ vhba, " online: lun");
+ dprintk(TRC_INFO,
+ vhba, " count is ");
+ dprintk(TRC_INFO,
+ vhba, "different\n");
+ vhba->scan_reqd = 1;
+ break;
+ }
+ }
+ }
+ for (k = 0; k < MAX_FIBRE_LUNS; k++)
+ tq->fcport->lun_ids[k] = -1;
+ for (k = 0; k < lun_count; k++) {
+ tq->fcport->lun_ids[k] =
+ tgt_status_msg.lun_ids[k];
+ dprintk(TRC_INFO, NULL,
+ "Lun id = %d\n",
+ tq->fcport->lun_ids[k]);
+ }
+ dprintk(TRC_INFO, NULL,
+ "New Lun_count= " "%d\n", lun_count);
+ tq->fcport->lun_count = lun_count;
+ memcpy(tq->fcport->port_name, tq->port_name,
+ WWN_SIZE);
+ }
+ }
+ ha->stats.fc_stats.last_up_tgt = tgt;
+ }
+}
+
+void process_cqp_msg(struct virtual_hba *vhba, u8 *msg, int length)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct vhba_discovery_msg *r_msg;
+ struct vhba_discovery_cont_msg *r_cont_msg;
+ struct vhba_tgt_status_msg tgt_status_msg;
+ struct enable_rsp *enable_rsp;
+ struct os_tgt *tq;
+ struct tgt_info *tgt_msg;
+ struct vhba_link_status *link_status_msg = NULL;
+ struct xsvhba_work *xwork;
+ int type, vp;
+ int i, k, found;
+ int work_submitted = 0;
+
+ u8 port_name[WWN_SIZE];
+ u8 node_name[WWN_SIZE];
+ u32 lun_count;
+ u32 tgt;
+ u32 port_id;
+ u32 loop_id;
+ u32 t_count;
+ s32 bound_value;
+ u8 lun_map[MAX_FIBRE_LUNS >> 3];
+ u16 lun_id[MAX_FIBRE_LUNS];
+ u8 media_type;
+
+ xwork = kmalloc(sizeof(struct xsvhba_work), GFP_ATOMIC);
+ if (!xwork) {
+ eprintk(NULL, "vhba_work kmalloc failed\n");
+ return;
+ }
+
+ xwork->idr = vhba->idr;
+
+ type = *(u8 *) msg;
+
+ if (type == DISC_INFO_UPDATE) {
+ r_msg = (struct vhba_discovery_msg *)msg;
+ dprintk(TRC_INFO, vhba,
+ "Got disc info from IOP." " length %d\n", length);
+
+ ha->stats.fc_stats.disc_info_cnt++;
+
+ if (be16_to_cpu(r_msg->target_count) == 0) {
+ dprintk(TRC_INFO, vhba, "zero tgts discovered!\n");
+ ha->target_count = 0;
+ ha->max_targets = ha->target_count;
+ ha->max_cont_segs = be16_to_cpu(r_msg->cont_count);
+ dprintk(TRC_CQP, vhba,
+ "Number of continuation segments = %d\n",
+ ha->max_cont_segs);
+ kfree(xwork);
+ return;
+ }
+
+ ha->target_count = be16_to_cpu(r_msg->target_count);
+ t_count = (u32) ha->target_count;
+ dprintk(TRC_INFO, vhba, "Target Count %d\n", t_count);
+
+ ha->max_targets = ha->target_count;
+ ha->max_tgt_id = ha->max_targets;
+
+ ha->max_cont_segs = be16_to_cpu(r_msg->cont_count);
+ k = (int)ha->max_cont_segs;
+ dprintk(TRC_CQP, vhba, "Cont segs %d\n", k);
+
+ tgt_msg = (struct tgt_info *)(r_msg->tgt_data);
+
+ for (i = 0; i < ha->target_count; i++) {
+ /*
+ * use fcport from the message
+ * also get the fclun info
+ * check for return values...
+ */
+ for (k = 0; k < WWN_SIZE; k++)
+ port_name[k] = tgt_msg[i].wwpn[k];
+
+ for (k = 0; k < WWN_SIZE; k++)
+ node_name[k] = tgt_msg[i].wwnn[k];
+
+ port_id = be32_to_cpu(tgt_msg[i].port_id);
+ loop_id = (u32) (be16_to_cpu(tgt_msg[i].loop_id));
+ bound_value =
+ be32_to_cpu(tgt_msg[i].persistent_binding);
+ if ((bound_value != -1) &&
+ (bound_value >= MAX_FIBRE_TARGETS)) {
+ bound_value = -1;
+ }
+ lun_count = (u32) (be16_to_cpu(tgt_msg[i].lun_count));
+
+ dprintk(TRC_INFO, vhba,
+ "PWWN: %llx, NWWN: %llx, ",
+ wwn_to_u64(port_name),
+ wwn_to_u64(node_name));
+ dprintk(TRC_INFO, vhba,
+ "port_id(%x) loop_id(%x)",
+ (int) port_id, (int)loop_id);
+ dprintk(TRC_INFO, vhba,
+ " bound_value(%d) lun_count(%d)\n",
+ (int)bound_value, (int)lun_count);
+
+ for (k = 0; k < lun_count; k++) {
+ lun_id[k] = tgt_msg[i].lun_ids[k];
+ dprintk(TRC_INFO, vhba,
+ "lun id = %d\n", lun_id[k]);
+ }
+
+ media_type = tgt_msg[i].media_type;
+
+ vhba_target_bind(vhba, loop_id, node_name, port_name,
+ port_id, bound_value, lun_count,
+ lun_map, lun_id, media_type);
+ }
+
+ vhba_set_tgt_count(vhba);
+
+ if (ha->max_cont_segs == 0) {
+
+ /* Map all unbound fcports to the tgt map */
+ vhba_map_unbound_targets(vhba);
+
+ /* Set the loop status to LINK_UP if already not up */
+ if (atomic_read(&ha->link_state) != LINK_UP)
+ atomic_set(&ha->link_state, LINK_UP);
+
+ /*
+ * Let the workqueue handle the scsi scan
+ */
+
+ atomic_set(&vhba->vhba_state, VHBA_STATE_SCAN);
+ ha->discs_ready_flag = 1;
+ INIT_WORK(&xwork->work, vhba_handle_scan);
+ queue_work(vhba_workqueuep, &xwork->work);
+ work_submitted = 1;
+
+ }
+ vhba->scan_reqd = 1;
+
+ } else if (type == DISC_INFO_CONT_UPDATE) {
+ r_cont_msg = (struct vhba_discovery_cont_msg *)msg;
+ dprintk(TRC_INFO, vhba, "Got cont disc info from IOP\n");
+
+ if ((ha->max_cont_segs == 0) &&
+ (ha->max_cont_segs < r_cont_msg->seg_num)) {
+ dprintk(TRC_CQP, vhba,
+ "Max cont segs in the" " DISC_INFO msg is 0\n");
+ return;
+ }
+
+ t_count = (u32) be16_to_cpu(r_cont_msg->target_count);
+ dprintk(TRC_INFO, vhba, "Cont Target Count %d\n", t_count);
+ k = (int)be16_to_cpu(r_cont_msg->seg_num);
+
+ if ((ha->target_count + t_count) <= MAX_FIBRE_TARGETS) {
+ ha->target_count += t_count;
+
+ tgt_msg = (struct tgt_info *)(r_cont_msg->tgt_data);
+ for (i = 0; i < t_count; i++) {
+ /*
+ * use fcport from the message
+ * also get the fclun info
+ * check for return values...
+ */
+ for (k = 0; k < WWN_SIZE; k++)
+ port_name[k] = tgt_msg[i].wwpn[k];
+
+ for (k = 0; k < WWN_SIZE; k++)
+ node_name[k] = tgt_msg[i].wwnn[k];
+
+ port_id = be32_to_cpu(tgt_msg[i].port_id);
+ loop_id = be16_to_cpu(tgt_msg[i].loop_id);
+ bound_value =
+ be32_to_cpu(tgt_msg[i].persistent_binding);
+ lun_count = be16_to_cpu(tgt_msg[i].lun_count);
+
+ dprintk(TRC_INFO, vhba,
+ "PWWN: %llx, NWWN: %llx, ",
+ wwn_to_u64(port_name),
+ wwn_to_u64(node_name));
+ dprintk(TRC_INFO, vhba,
+ "port_id(%x) loop_id(%x)",
+ (int)port_id, (int)loop_id);
+ dprintk(TRC_INFO, vhba,
+ " bound_value(%d) lun_count(%d)\n",
+ (int)bound_value, (int)lun_count);
+
+ for (k = 0; k < lun_count; k++) {
+ lun_id[k] = tgt_msg[i].lun_ids[k];
+ dprintk(TRC_INFO, vhba,
+ "lun id = %d\n", lun_id[k]);
+ }
+
+ media_type = tgt_msg[i].media_type;
+
+ vhba_target_bind(vhba, loop_id, node_name,
+ port_name, port_id,
+ bound_value, lun_count,
+ lun_map, lun_id, media_type);
+ }
+ }
+
+ dprintk(TRC_CQP, vhba, "max disc msgs cnt is %d\n",
+ ha->max_cont_segs);
+ dprintk(TRC_CQP, vhba,
+ "disc cont update seg num is %d %d\n",
+ be16_to_cpu(r_cont_msg->seg_num), r_cont_msg->seg_num);
+
+ /* If last segment processed then start scanning */
+ if (ha->max_cont_segs == r_cont_msg->seg_num) {
+ vhba_map_unbound_targets(vhba);
+
+ ha->max_targets = ha->target_count;
+
+ /* Set the loop status to LINK_UP if already not up */
+ if (atomic_read(&ha->link_state) != LINK_UP)
+ atomic_set(&ha->link_state, LINK_UP);
+
+ dprintk(TRC_INFO, vhba,
+ "max_tgt_id= %d :", ha->max_tgt_id);
+ dprintk(TRC_INFO, vhba, " max_targets= %d\n",
+ ha->max_targets);
+
+ /*
+ * Let the workqueue handle the scsi scan
+ */
+
+ atomic_set(&vhba->vhba_state, VHBA_STATE_SCAN);
+ ha->discs_ready_flag = 1;
+ INIT_WORK(&xwork->work, vhba_handle_scan);
+ queue_work(vhba_workqueuep, &xwork->work);
+ work_submitted = 1;
+ }
+ vhba->scan_reqd = 1;
+ } else if (type == TGT_STATUS_UPDATE) {
+
+ memcpy(&tgt_status_msg, (struct vhba_tgt_status_msg *)msg,
+ sizeof(struct vhba_tgt_status_msg));
+ dprintk(TRC_INFO, vhba, "Got tgt status update from IOP\n");
+
+ vhba->scan_reqd = 1;
+
+ if (tgt_status_msg.flag == TGT_DEAD) {
+ loop_id =
+ (uint32_t) be16_to_cpu(tgt_status_msg.loop_id);
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ tq = TGT_Q(ha, tgt);
+ port_id = be32_to_cpu(tgt_status_msg.port_id);
+ if (tq && (memcmp(tgt_status_msg.wwpn,
+ tq->fcport->port_name,
+ WWN_SIZE) == 0)
+ && tq->d_id.b24 == port_id) {
+ atomic_set(&tq->fcport->state,
+ FCS_DEVICE_DEAD);
+ ha->stats.fc_stats.rscn_dead_cnt++;
+ ha->stats.fc_stats.last_dead_tgt = tgt;
+ dprintk(TRC_INFO, vhba,
+ "RSCN: Target dead msg ");
+ dprintk(TRC_INFO, vhba,
+ "received: PWWN: %llx,",
+ wwn_to_u64(tgt_status_msg.wwpn));
+ dprintk(TRC_INFO, vhba,
+ "port_id: 0x%x,loop_id: 0x%x\n",
+ be32_to_cpu
+ (tgt_status_msg.port_id),
+ loop_id);
+ }
+ }
+ vhba->scan_reqd = 1;
+ } else if (tgt_status_msg.flag == TGT_LOST) {
+ found = 0;
+ loop_id = (u32) be16_to_cpu(tgt_status_msg.loop_id);
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ tq = TGT_Q(ha, tgt);
+ if (tq && (memcmp(tgt_status_msg.wwpn,
+ tq->fcport->port_name,
+ WWN_SIZE) == 0)) {
+ found = 1;
+ if (atomic_read(&tq->fcport->state) !=
+ FCS_DEVICE_LOST) {
+ dprintk(TRC_INFO, vhba,
+ "RSCN: Target Offline ");
+ dprintk(TRC_INFO, vhba,
+ "msg received: PWWN:%llx,",
+ wwn_to_u64
+ (tgt_status_msg.wwpn));
+ dprintk(TRC_INFO, vhba,
+ "port_id: 0x%x, ",
+ be32_to_cpu
+ (tgt_status_msg.port_id));
+ dprintk(TRC_INFO, vhba,
+ "loop_id: 0x%x\n",
+ loop_id);
+ ha->stats.
+ fc_stats.rscn_dn_cnt++;
+ vhba_set_tgt_offline(vhba, tq);
+ } else {
+ dprintk(TRC_INFO, vhba,
+ "RSCN: Target Offline ");
+ dprintk(TRC_INFO, vhba,
+ "msg received for already ");
+ dprintk(TRC_INFO, vhba,
+ "disabled device: PWWN:%llx,",
+ wwn_to_u64
+ (tgt_status_msg.wwpn));
+ dprintk(TRC_INFO, vhba,
+ "port_id: 0x%x, ",
+ be32_to_cpu
+ (tgt_status_msg.port_id));
+ dprintk(TRC_INFO, vhba,
+ "loop_id: 0x%x\n",
+ loop_id);
+ ha->stats.fc_stats.
+ rscn_multiple_dn_cnt++;
+ }
+ ha->stats.fc_stats.last_dn_tgt = tgt;
+ }
+ }
+ if (!found) {
+ eprintk(vhba,
+ "RSCN: No target ");
+ eprintk(vhba, "found for offline msg: ");
+ eprintk(vhba, "port_id: 0x%x, loop_id: 0x%x\n",
+ be32_to_cpu(tgt_status_msg.port_id),
+ loop_id);
+ }
+ } else if (tgt_status_msg.flag == TGT_FOUND) {
+
+ if (atomic_read(&ha->link_state) != LINK_UP) {
+ ha->stats.fc_stats.link_up_cnt++;
+ atomic_set(&ha->link_state, LINK_UP);
+ }
+ found = 0;
+ vhba_handle_targets(vhba, tgt_status_msg, &found);
+ if (!found) {
+ /* Brand new target discovered. process it */
+ loop_id =
+ (u32) be16_to_cpu(tgt_status_msg.loop_id);
+ port_id = be32_to_cpu(tgt_status_msg.port_id);
+ if (tgt_status_msg.persistent_binding != -1) {
+ bound_value =
+ be32_to_cpu
+ (tgt_status_msg.persistent_binding);
+ ha->stats.fc_stats.last_up_tgt =
+ bound_value;
+ } else {
+ bound_value = -1;
+ }
+
+ if (bound_value > MAX_TARGETS) {
+ eprintk(vhba,
+ "bound value exceeds limits\n");
+ bound_value = -1;
+ }
+
+ dprintk(TRC_INFO, vhba,
+ "RSCN: Target online msg received fr");
+ dprintk(TRC_INFO, vhba,
+ " new device: PWWN:%llx, ",
+ wwn_to_u64(tgt_status_msg.wwpn));
+ dprintk(TRC_INFO, vhba,
+ "port_id: 0x%x, loop_id: ", (u32)
+ be32_to_cpu(tgt_status_msg.port_id));
+ dprintk(TRC_INFO, vhba,
+ "0x%x binding: %d\n",
+ loop_id, (int)bound_value);
+ dprintk(TRC_INFO, vhba,
+ "RSCN: Curr tgt_cnt: 0x%x max_tgt_id ",
+ ha->target_count);
+ dprintk(TRC_INFO, vhba,
+ "0x%x, max_tgts 0x%x\n",
+ ha->max_tgt_id, ha->max_targets);
+ for (k = 0; k < WWN_SIZE; k++)
+ port_name[k] = tgt_status_msg.wwpn[k];
+ for (k = 0; k < WWN_SIZE; k++)
+ node_name[k] = tgt_status_msg.wwnn[k];
+ lun_count =
+ (u32) (be16_to_cpu
+ (tgt_status_msg.lun_count));
+ for (k = 0; k < lun_count; k++)
+ lun_id[k] = tgt_status_msg.lun_ids[k];
+
+ media_type = tgt_status_msg.media_type;
+
+ vhba_target_bind(vhba, loop_id, node_name,
+ port_name, port_id,
+ bound_value, lun_count,
+ lun_map, lun_id, media_type);
+ vhba_map_unbound_targets(vhba);
+ if (bound_value == -1)
+ ha->stats.fc_stats.last_up_tgt =
+ ha->max_tgt_id;
+ if (vhba->scanned_once == 0) {
+ /*
+ * Let the workqueue handle the
+ * scsi scan
+ */
+ atomic_set(&vhba->vhba_state,
+ VHBA_STATE_SCAN);
+ INIT_WORK(&xwork->work,
+ vhba_handle_scan);
+ queue_work(vhba_workqueuep,
+ &xwork->work);
+ work_submitted = 1;
+ } else {
+ /*for new device */
+ vhba->scan_reqd = 1;
+ }
+ } else {
+ vhba_set_tgt_count(vhba);
+ atomic_set(&vhba->vhba_state,
+ VHBA_STATE_ACTIVE);
+ }
+ }
+
+ schedule_update_rports(ha);
+
+ } else if (type == ENABLE_RSP) {
+ enable_rsp = (struct enable_rsp *)msg;
+ ha->stats.fc_stats.enable_resp_cnt++;
+ vp = (int)enable_rsp->vp_index;
+ dprintk(TRC_INFO, vhba,
+ "Got enable rsp: vp_index %d, res_id %Lx for ha\n",
+ vp, enable_rsp->resource_id);
+
+ for (i = 0; i < MAX_VHBAS; i++) {
+ if (vhba->cfg && vhba->ha &&
+ (vhba->resource_id == enable_rsp->resource_id)) {
+ dprintk(TRC_INFO, vhba,
+ "Setting vp_index %d for ha\n", vp);
+ vhba->ha->vp_index = enable_rsp->vp_index;
+ break;
+ }
+ }
+ } else if (type == PLINK_STATUS_UPDATE) {
+ dprintk(TRC_CQP, vhba, "got plink status update\n");
+ link_status_msg = (struct vhba_link_status *)msg;
+ if (link_status_msg->phy_link_status == LINK_DOWN) {
+ dprintk(TRC_INFO, vhba,
+ "received link down msg from iop\n");
+ ha->stats.fc_stats.link_dn_cnt++;
+ if (atomic_read(&ha->link_state) == LINK_UP) {
+ atomic_set(&ha->link_state, LINK_DOWN);
+ vhba_set_all_tgts_offline(vhba);
+ } else {
+ dprintk(TRC_INFO, vhba,
+ "vhba already in link down state\n");
+ }
+ } else if (link_status_msg->phy_link_status == LINK_DEAD) {
+ atomic_set(&ha->link_state, LINK_DEAD);
+ ha->stats.fc_stats.link_dead_cnt++;
+ dprintk(TRC_INFO, vhba, "vhba link dead state\n");
+ } else {
+ ha->stats.fc_stats.link_up_cnt++;
+ }
+
+ } else {
+ eprintk(vhba, "Unknown msg from IOP\n");
+ }
+ /* Not all the code paths submit work. In error cases
+ * or some states, the work might need to be freed */
+ if (!work_submitted)
+ kfree(xwork);
+}
+
+static inline struct cont_a64_entry *vhba_prep_cont_type1_iocb(struct
+ virtual_hba
+ *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct cont_a64_entry *cont_pkt;
+
+ if (!ha) {
+ eprintk(NULL, "null ha context\n");
+ return 0;
+ }
+
+ /* Adjust ring index. */
+ ha->req_ring_windex++;
+ if (ha->req_ring_windex == ha->request_q_length) {
+ ha->req_ring_windex = 0;
+ ha->request_ring_ptr = ha->request_ring;
+ } else
+ ha->request_ring_ptr++;
+
+ cont_pkt = (struct cont_a64_entry *)ha->request_ring_ptr;
+
+ /* Load packet defaults. */
+ cont_pkt->entry_type = CONTINUE_A64_TYPE;
+
+ return cont_pkt;
+}
+
+static inline void
+vhba_build_scsi_iocbs(struct srb *sp, struct cmd_type_7 *cmd_pkt, u16 tot_dsds)
+{
+ struct scsi_xg_vhba_host *ha = sp->ha;
+ struct virtual_hba *vhba = ha->vhba;
+ struct scsi_cmnd *cmd;
+ u16 avail_dsds;
+ u32 *cur_dsd;
+ u32 *rkey;
+ u32 rindex;
+ u32 sp_index;
+ u64 *page_list = NULL;
+ u64 mapped_addr;
+ u32 *cur_dsd_len;
+ int unaligned_io = 0;
+ int ret;
+ u32 request_bufflen = scsi_bufflen(sp->cmd);
+
+ u64 fmr_page_mask = ~((u64) PAGE_SIZE - 1);
+
+ cmd = sp->cmd;
+
+ /* Update entry type to indicate Command Type 3 IOCB */
+ cmd_pkt->entry_type = COMMAND_TYPE_7;
+
+ /* No data transfer */
+ if (request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) {
+ cmd_pkt->byte_count = cpu_to_le32(0);
+ sp->ha->stats.io_stats.total_task_mgmt_reqs++;
+ dprintk(TRC_SCSI_ERRS, vhba, "Task Mgmt Req. Returning\n");
+ return;
+ }
+
+ /* Set transfer direction */
+ if (cmd->sc_data_direction == DMA_TO_DEVICE) {
+ cmd_pkt->task_mgmt_flags =
+ cpu_to_le16(TMF_WRITE_DATA);
+ ha->stats.io_stats.total_write_reqs++;
+ ha->stats.io_stats.total_write_mbytes += cmd_pkt->byte_count;
+ } else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
+ cmd_pkt->task_mgmt_flags =
+ cpu_to_le16(TMF_READ_DATA);
+ ha->stats.io_stats.total_read_reqs++;
+ ha->stats.io_stats.total_read_mbytes += cmd_pkt->byte_count;
+ }
+
+ /* One DSD is available in the Command Type 3 IOCB */
+ cmd_pkt->rkey1 = 0;
+ cmd_pkt->rkey2 = 0;
+ cmd_pkt->rkey3 = 0;
+ cmd_pkt->rkey4 = 0;
+ cmd_pkt->rkey5 = 0;
+
+ avail_dsds = 1;
+ cur_dsd = (u32 *) &(cmd_pkt->dseg_0_address);
+ cur_dsd_len = (u32 *) &(cmd_pkt->dseg_0_len);
+ rkey = (u32 *) &(cmd_pkt->rkey1);
+ rindex = 0;
+ sp_index = 0;
+ sp->tot_dsds = tot_dsds;
+
+ /* Load data segments */
+ if (scsi_sg_count(cmd) != 0) {
+ struct scatterlist *cur_seg;
+ int mapped_len = 0;
+ int remaining_length = 0;
+ int first_pg_offset = 0;
+ int cntr = 0;
+ int t_cntr = 0;
+ u64 cur_map_ptr = 0;
+ int pg_list_cntr = 0;
+
+ dprintk(TRC_IO, vhba,
+ "hndl %d: Scatter Gather list used\n",
+ (int)cmd_pkt->handle);
+
+ {
+ ha->stats.fmr_stats.total_fmr_ios++;
+
+ cur_seg = scsi_sglist(cmd);
+ dprintk(TRC_FMR, vhba,
+ "SG tot_dsds %d. using FMR...\n", tot_dsds);
+
+ page_list = kmalloc(sizeof(u64) *
+ ((request_bufflen /
+ PAGE_SIZE) +
+ (2 * tot_dsds)), GFP_ATOMIC);
+
+ dprintk(TRC_FMR, vhba,
+ "allocated %d address ptrs for fmr list\n",
+ (int)((request_bufflen / PAGE_SIZE) +
+ (2 * tot_dsds)));
+ if (!page_list) {
+ eprintk(vhba, "alloc failed!\n");
+ sp->error_flag = 1;
+ return;
+ }
+
+ mapped_len = 0;
+
+ for (cntr = 0; cntr < tot_dsds; cntr++) {
+ if (pg_list_cntr > vhba_max_dsds_in_fmr) {
+ eprintk(vhba,
+ "%s: Page list ptrs ", __func__);
+ eprintk(vhba, "exceeeds 65!\n");
+ assert(0);
+ sp->error_flag = 1;
+ dprintk(TRC_FMR, vhba,
+ "freeing pg_list\n");
+ kfree(page_list);
+ page_list = NULL;
+ sp->error_flag = 1;
+ return;
+ }
+ remaining_length =
+ ib_sg_dma_len(vhba->xsmp_info.ib_device,
+ cur_seg);
+ cur_map_ptr =
+ ib_sg_dma_address(vhba->xsmp_info.ib_device,
+ cur_seg) & fmr_page_mask;
+ dprintk(TRC_FMR, vhba,
+ "new dsd rem len %d ", remaining_length);
+ dprintk(TRC_FMR, vhba,
+ "cur_map_ptr %lx\n",
+ (unsigned long)cur_map_ptr);
+ if (cntr == 0) {
+ page_list[pg_list_cntr] =
+ ib_sg_dma_address(vhba->
+ xsmp_info.ib_device,
+ cur_seg) &
+ fmr_page_mask;
+ first_pg_offset =
+ (ib_sg_dma_address
+ (vhba->xsmp_info.ib_device,
+ cur_seg) -
+ page_list[pg_list_cntr]) &
+ ~fmr_page_mask;
+ remaining_length =
+ ib_sg_dma_len(vhba->
+ xsmp_info.ib_device,
+ cur_seg)
+ - (PAGE_SIZE - first_pg_offset);
+ dprintk(TRC_FMR, vhba,
+ "offset %d rem len in ",
+ first_pg_offset);
+ dprintk(TRC_FMR, vhba,
+ "dsd %d\n", remaining_length);
+ cur_map_ptr = page_list[pg_list_cntr] +
+ PAGE_SIZE;
+ pg_list_cntr++;
+ } else {
+ if ((cur_map_ptr & 0xfff) != 0) {
+ dprintk(TRC_FMR, vhba,
+ "\n%s(): Non-alligned",
+ __func__);
+ dprintk(TRC_FMR, vhba,
+ " page address = 0x%x",
+ (int)cur_map_ptr);
+ panic("Non-aligned page in ");
+ panic("middle element\n");
+ assert(0);
+ ha->stats.
+ fmr_stats.unaligned_ptr_cnt++;
+ unaligned_io = 1;
+ }
+ }
+ while (remaining_length > 0) {
+ dprintk(TRC_FMR, vhba,
+ "rem len %d cntr %x ",
+ remaining_length, pg_list_cntr);
+ dprintk(TRC_FMR, vhba,
+ "cur_map_ptr %lx\n",
+ (unsigned long)cur_map_ptr);
+ page_list[pg_list_cntr] = cur_map_ptr;
+ remaining_length =
+ remaining_length - PAGE_SIZE;
+ cur_map_ptr += PAGE_SIZE;
+ pg_list_cntr++;
+ }
+
+ if (unaligned_io) {
+ ha->stats.fmr_stats.unaligned_io_cnt++;
+ dprintk(TRC_FMR, vhba,
+ "freeing pg_list\n");
+ kfree(page_list);
+ page_list = NULL;
+ sp->error_flag = 1;
+ return;
+ }
+
+ dprintk(TRC_FMR, vhba,
+ "final rem len %d cntr %d cur_map_ptr ",
+ remaining_length, pg_list_cntr);
+ dprintk(TRC_FMR, vhba,
+ "%lx\n",
+ (unsigned long)cur_map_ptr);
+ mapped_len +=
+ (int)ib_sg_dma_len(vhba->
+ xsmp_info.ib_device,
+ cur_seg);
+ dprintk(TRC_FMR, vhba,
+ "hndl %d: mapped len is %u\n",
+ (int)cmd_pkt->handle, mapped_len);
+ SG_NEXT(cur_seg);
+ }
+
+ for (t_cntr = 0; t_cntr < pg_list_cntr; t_cntr++)
+ dprintk(TRC_FMR, vhba,
+ "hndl %d: SG FMR: page_list[%d] = ",
+ (int)cmd_pkt->handle, t_cntr);
+ dprintk(TRC_FMR, vhba,
+ "%lx\n",
+ (unsigned long)page_list[t_cntr]);
+
+ mapped_addr = page_list[0];
+ dprintk(TRC_FMR, vhba,
+ "calling map buf fmr len %u cmd",
+ mapped_len);
+ dprintk(TRC_FMR, vhba,
+ " bufflen %u page_list_cntr %x mapped addr ",
+ request_bufflen, pg_list_cntr);
+ dprintk(TRC_FMR, vhba,
+ "%lx\n",
+ (unsigned long)mapped_addr);
+ dprintk(TRC_FMR, vhba,
+ "sp %lx sp_index %lx spfmr pool %lx\n",
+ (unsigned long)sp, (unsigned long)sp_index,
+ (unsigned long)sp->pool_fmr[sp_index]);
+ ret = vhba_map_buf_fmr(vhba, page_list,
+ pg_list_cntr, &mapped_addr, sp,
+ sp_index);
+ if (ret == -1) {
+ dprintk(TRC_FMR_ERRS, vhba,
+ "vhba_map_buf_fmr failed\n");
+ dprintk(TRC_FMR, vhba, "freeing pg_list\n");
+ kfree(page_list);
+ page_list = NULL;
+ sp->error_flag = 1;
+ return;
+ }
+
+ dprintk(TRC_FMR, vhba,
+ "hndl %d: SG FMR: mapped addr %llx + ",
+ (int)cmd_pkt->handle, mapped_addr);
+ dprintk(TRC_FMR, vhba,
+ "offset %d\n", first_pg_offset);
+ dprintk(TRC_FMR, vhba,
+ "hndl %d: SG FMR: len %u rkey 0x%x ",
+ (int)cmd_pkt->handle, mapped_len,
+ ((struct ib_pool_fmr *)sp->pool_fmr[sp_index])->
+ fmr->rkey);
+ dprintk(TRC_FMR, vhba, "rindex 0x%x\n", rindex);
+ mapped_addr = mapped_addr + first_pg_offset;
+ *cur_dsd++ = cpu_to_le32(LSD(mapped_addr));
+ *cur_dsd++ = cpu_to_le32(MSD(mapped_addr));
+ *cur_dsd_len = cpu_to_le32((u32) request_bufflen);
+
+ dprintk(TRC_FMR, NULL,
+ "Original SCSI request_buflen = %d 0x%x\n",
+ (u32) request_bufflen, (u32) request_bufflen);
+
+ sp->tot_dsds = 1;
+ cmd_pkt->dseg_count = cpu_to_le16(sp->tot_dsds);
+ dprintk(TRC_FMR, vhba, "done with mapping\n");
+
+ cmd_pkt->rkey1 = cpu_to_be32(((struct ib_pool_fmr *)
+ sp->
+ pool_fmr[sp_index])->fmr->
+ rkey);
+ }
+ } else {
+ dma_addr_t req_dma;
+ unsigned long offset;
+
+ dprintk(TRC_FMR, vhba,
+ "hndl %d: No Scatter Gather list used\n",
+ (int)cmd_pkt->handle);
+ offset = ((unsigned long)scsi_sglist(cmd) & ~PAGE_MASK);
+ req_dma = ib_dma_map_single(vhba->xsmp_info.ib_device,
+ (void *)scsi_sglist(cmd),
+ request_bufflen,
+ cmd->sc_data_direction);
+ sp->dma_handle = req_dma;
+
+ if (req_dma & 0x7) {
+ dprintk(TRC_ERRORS, vhba,
+ "data buff address not 8 byte aligned!\n");
+ sp->error_flag = 1;
+ ib_dma_unmap_single(vhba->xsmp_info.ib_device,
+ sp->dma_handle, request_bufflen,
+ cmd->sc_data_direction);
+ return;
+ }
+
+ {
+ int i;
+ int num_pages;
+
+ req_dma = req_dma & fmr_page_mask;
+ offset = sp->dma_handle - req_dma;
+ sp_index = 0;
+
+ /* Get the number of pages */
+ num_pages = (unsigned long)
+ request_bufflen / PAGE_SIZE;
+ if (request_bufflen % PAGE_SIZE)
+ num_pages += 1;
+
+ if ((offset + (request_bufflen % PAGE_SIZE)) >
+ PAGE_SIZE)
+ num_pages += 1;
+
+ page_list = kmalloc(sizeof(u64) *
+ num_pages, GFP_ATOMIC);
+ if (!page_list) {
+ eprintk(vhba, "Page alloc failed!\n");
+ /*
+ * CHECK: need to possibly call
+ * ib_dma_unmap_single here to free
+ * up the dma mapping
+ */
+ sp->error_flag = 1;
+ return;
+ }
+
+ for (i = 0; i < num_pages; i++) {
+ page_list[i] = sp->dma_handle + (PAGE_SIZE * i);
+ page_list[i] &= fmr_page_mask;
+ }
+ mapped_addr = cmd_pkt->handle + 1;
+ mapped_addr = mapped_addr << 12;
+ mapped_addr = page_list[0];
+
+ ret = vhba_map_buf_fmr(vhba, page_list, num_pages,
+ &mapped_addr, sp, sp_index);
+
+ if (ret == -1) {
+ dprintk(TRC_ERRORS, vhba,
+ "vhba_map_buf_fmr failed\n");
+ kfree(page_list);
+ page_list = NULL;
+ sp->error_flag = 1;
+ return;
+ }
+
+ dprintk(TRC_FMR, vhba,
+ "no sg: hndl %d: NSG FMR: req_dma %llx",
+ (int)cmd_pkt->handle,
+ (unsigned long long int)req_dma);
+ dprintk(TRC_FMR, vhba,
+ " mapped addr %llx + offset %lu\n",
+ mapped_addr, offset);
+ mapped_addr += offset;
+ rkey[rindex] = cpu_to_be32(((struct ib_pool_fmr *)
+ sp->
+ pool_fmr[sp_index])->fmr->
+ rkey);
+ *cur_dsd++ = cpu_to_le32(LSD(mapped_addr));
+ *cur_dsd++ = cpu_to_le32(MSD(mapped_addr));
+ *cur_dsd_len = cpu_to_le32((u32) request_bufflen);
+
+ dprintk(TRC_FMR, NULL,
+ "Original SCSI request_buflen = %d 0x%x\n",
+ (u32) request_bufflen, (u32) request_bufflen);
+
+ dprintk(TRC_FMR, vhba,
+ "no sg: hndl %d: NSG FMR: mapped addr",
+ (int)cmd_pkt->handle);
+ dprintk(TRC_FMR, vhba,
+ " 0x%llx len 0x%x rkey 0x%x rindex 0x%x\n",
+ mapped_addr, request_bufflen,
+ ((struct ib_pool_fmr *)sp->pool_fmr[sp_index])->
+ fmr->rkey, rindex);
+
+ }
+ }
+
+ kfree(page_list);
+ page_list = NULL;
+}
+
+static void sense_buffer(struct scsi_cmnd *cmd, int key, int asc, int asq)
+{
+ u8 *sbuff;
+
+ sbuff = cmd->sense_buffer;
+ memset(sbuff, 0, SCSI_SENSE_BUFFERSIZE);
+ sbuff[0] = 0x70; /* fixed, current */
+ sbuff[2] = key;
+ sbuff[7] = 0xa; /* implies 18 byte sense buffer */
+ sbuff[12] = asc;
+ sbuff[13] = asq;
+
+ dprintk(TRC_SCSI_ERRS, NULL, "[sense_key,asc,ascq]: [0x%x,0x%x,0x%x]\n",
+ key, asc, asq);
+}
+
+int vhba_report_luns_cmd(struct srb *sp, u32 t, u32 l)
+{
+ struct scatterlist *sg;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+ struct scsi_cmnd *cmd;
+ struct xg_scsi_lun *lun;
+ struct os_tgt *tq;
+ unsigned long flags = 0;
+ int ret = 0;
+ int i;
+ u16 lun_cnt;
+ int lun_byte;
+ int rsp_byte;
+ int total_size;
+ int copy_len;
+ char *buf;
+ char *data_ptr;
+ u8 *cdb;
+ int alloc_len;
+ int req_len;
+ int act_len;
+ u32 request_bufflen = scsi_bufflen(sp->cmd);
+
+ cmd = sp->cmd;
+ ha = sp->ha;
+ cdb = cmd->cmnd;
+ vhba = ha->vhba;
+
+ dprintk(TRC_FUNCS, vhba, "Entering...\n");
+
+ spin_lock_irqsave(&ha->io_lock, flags);
+
+ /* Check allocation length and select report */
+ alloc_len = cdb[9] + (cdb[8] << 8) + (cdb[7] << 16) + (cdb[6] << 24);
+ if ((alloc_len < 16) || (cdb[2] > 2)) {
+ sense_buffer(cmd, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
+ cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+ ret = 1;
+ goto err;
+ }
+
+ /* Check reserved bit */
+ if (cdb[1] || cdb[3] || cdb[4] || cdb[5] || cdb[10]) {
+ sense_buffer(cmd, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
+ cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+ ret = 1;
+ goto err;
+ }
+
+ tq = TGT_Q(ha, t);
+ lun_cnt = tq->fcport->lun_count;
+ lun_byte = lun_cnt * sizeof(struct xg_scsi_lun);
+ rsp_byte = (lun_cnt + 1) * sizeof(struct xg_scsi_lun);
+
+ /* Calculate actual length */
+ req_len = request_bufflen;
+ scsi_set_resid(cmd, 0);
+ if (alloc_len < req_len) {
+ act_len = alloc_len;
+ scsi_set_resid(cmd, req_len - alloc_len);
+ } else {
+ act_len = req_len;
+ scsi_set_resid(cmd, alloc_len - req_len);
+ }
+ dprintk(TRC_SCSI, vhba, "req_len=%d, alloc_len=%d, act_len=%d, ",
+ req_len, alloc_len, act_len);
+
+ if (rsp_byte > act_len) {
+ rsp_byte = act_len;
+ lun_cnt = act_len / sizeof(struct xg_scsi_lun);
+ if (lun_cnt > 0)
+ lun_cnt--;
+ else
+ lun_cnt = 0;
+ dprintk(TRC_SCSI, vhba,
+ "Truncate response buffer, " "lun_cnt=%d\n", lun_cnt);
+ }
+ dprintk(TRC_SCSI, vhba, "Total number of luns active = %d\n", lun_cnt);
+
+ lun = kmalloc(rsp_byte, GFP_ATOMIC);
+ if (!lun) {
+ dprintk(TRC_SCSI, vhba, "Fail to allocate memory\n");
+ cmd->result = DID_ERROR << 16;
+ ret = 1;
+ goto err;
+ }
+ memset(lun, 0, rsp_byte);
+
+ /* Create the header. */
+ lun[0].scsi_lun[0] = (lun_byte >> 24) & 0xff;
+ lun[0].scsi_lun[1] = (lun_byte >> 16) & 0xff;
+ lun[0].scsi_lun[2] = (lun_byte >> 8) & 0xff;
+ lun[0].scsi_lun[3] = (lun_byte >> 0) & 0xff;
+
+ /* Create data */
+ for (i = 1; i <= lun_cnt; i++) {
+ lun[i].scsi_lun[0] = ((tq->fcport->lun_ids[i - 1] >> 8) & 0xff);
+ lun[i].scsi_lun[1] = (tq->fcport->lun_ids[i - 1] & 0xff);
+ lun[i].scsi_lun[2] = 0;
+ lun[i].scsi_lun[3] = 0;
+ lun[i].scsi_lun[4] = 0;
+ lun[i].scsi_lun[5] = 0;
+ lun[i].scsi_lun[6] = 0;
+ lun[i].scsi_lun[7] = 0;
+ }
+
+ /* Data copy */
+ if (scsi_sg_count(cmd)) {
+ data_ptr = (u8 *) &(lun[0]);
+ total_size = rsp_byte;
+ sg = scsi_sglist(cmd);
+ dprintk(TRC_SCSI, vhba, "S/G list, num_sg=%d, buf_len=%d\n",
+ scsi_sg_count(cmd), request_bufflen);
+ dprintk(TRC_SCSI, vhba, "total response size = 0x%x\n",
+ total_size);
+
+ while (total_size > 0) {
+ unsigned int sg_offset = SG_OFFSET(sg);
+ unsigned int sg_length = SG_LENGTH(sg);
+
+ if (total_size > (sg_length - sg_offset))
+ copy_len = sg_length - sg_offset;
+ else
+ copy_len = total_size;
+
+ dprintk(TRC_SCSI, vhba,
+ "sg_len=0x%x, sg_offset=0x%x, ",
+ sg_length, sg_offset);
+ dprintk(TRC_SCSI, vhba, "copy_len=0x%x\n",
+ copy_len);
+
+ buf = page_address(sg_page(sg)) + sg_offset;
+ if (!buf) {
+ ret = 1;
+ goto err_2;
+ }
+ memcpy(buf, data_ptr, copy_len);
+
+ total_size -= copy_len;
+ if (total_size > 0) {
+ dprintk(TRC_SCSI, vhba,
+ "More data 0x%x\n", total_size);
+ data_ptr += copy_len;
+ SG_NEXT(sg);
+ }
+ }
+ SG_RESET(sg);
+ } else if (request_bufflen) {
+ dprintk(TRC_SCSI, vhba, "Single buffer size=0x%x\n",
+ request_bufflen);
+ memcpy(scsi_sglist(cmd), (void *)lun, rsp_byte);
+ }
+ cmd->result = DID_OK << 16;
+err_2:
+ kfree(lun);
+err:
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+
+ return ret;
+}
+
+int vhba_start_scsi(struct srb *sp, u32 tgt, u32 lun, u32 handle)
+{
+ struct cmd_type_7 *cmd_pkt;
+ struct scsi_xg_vhba_host *ha = sp->ha;
+ struct virtual_hba *vhba = ha->vhba;
+ struct scsi_cmnd *cmd = sp->cmd;
+ struct os_tgt *tq;
+ struct scatterlist *sg;
+ int tot_dsds;
+ int req_cnt, i;
+ u16 lcl_timeout;
+ u32 request_bufflen = scsi_bufflen(cmd);
+
+ dprintk(TRC_FUNCS, NULL, "Entering...\n");
+
+ sp->unaligned_sg = NULL;
+ sp->bounce_buffer = NULL;
+ if (scsi_sg_count(cmd) && (sp->cmd->sc_data_direction != DMA_NONE)) {
+ if (check_sg_alignment(sp, scsi_sglist(cmd))) {
+ sp->unaligned_sg = vhba_setup_bounce_buffer(sp);
+ if (!sp->unaligned_sg) {
+ pr_err("Error: unable to setup bounce buffr\n");
+ sp->error_flag = 1;
+ return 1;
+ }
+ ha->stats.fmr_stats.unaligned_io_cnt++;
+ }
+ }
+
+ /*
+ * Enqueue srb in the outstanding commands
+ * Check if marker is needed
+ */
+ tot_dsds = 0;
+ sg = NULL;
+
+ if (scsi_sg_count(cmd)) {
+ sg = scsi_sglist(cmd);
+ tot_dsds = ib_dma_map_sg(vhba->xsmp_info.ib_device,
+ sg, scsi_sg_count(cmd),
+ cmd->sc_data_direction);
+ } else if (request_bufflen)
+ tot_dsds++;
+
+ req_cnt = 1;
+
+ if (req_cnt > MAX_IOCBS_IN_VH) {
+ eprintk(vhba,
+ "Total IOCBS %d > max val %d with ",
+ req_cnt, MAX_IOCBS_IN_VH);
+ eprintk(vhba, "total dsds %d\n", tot_dsds);
+ goto queuing_error;
+ }
+
+ if (tot_dsds > vhba_max_dsds_in_fmr) {
+ eprintk(vhba, "Total DSDs %d > %d\n",
+ tot_dsds, (int)vhba_max_dsds_in_fmr);
+ goto queuing_error;
+ }
+
+ if (((ha->req_ring_windex + 1) % 1024) == *ha->req_ring_rindex) {
+ dprintk(TRC_IO, NULL, "Queue full\n");
+ goto queuing_error;
+ }
+
+ /* Make sure there is place for all IOCBS in the ring... */
+ cmd_pkt = (struct cmd_type_7 *)ha->request_ring_ptr;
+
+ memset(cmd_pkt, 0, sizeof(struct cmd_type_7));
+
+ cmd_pkt->handle = handle;
+ sp->iocb_handle = handle;
+ if (vhba_multiple_q)
+ cmd_pkt->handle = cmd_pkt->handle | (sp->queue_num << 16);
+
+ sp->cmd->host_scribble =
+ (unsigned char *)(unsigned long)cmd_pkt->handle;
+ ha->req_q_cnt -= req_cnt;
+
+ tq = TGT_Q(ha, tgt);
+ cmd_pkt->nport_handle = cpu_to_le16(tq->fcport->loop_id);
+ dprintk(TRC_IO, vhba, "NPORT hndl is 0x%x\n", cmd_pkt->nport_handle);
+
+ cmd_pkt->port_id[0] = tq->d_id.b.al_pa;
+ dprintk(TRC_IO, vhba, "PORT ID byte 0 is 0x%x\n", cmd_pkt->port_id[0]);
+ cmd_pkt->port_id[1] = tq->d_id.b.area;
+ dprintk(TRC_IO, vhba, "PORT ID byte 1 is 0x%x\n", cmd_pkt->port_id[1]);
+ cmd_pkt->port_id[2] = tq->d_id.b.domain;
+ dprintk(TRC_IO, vhba, "PORT ID byte 2 is 0x%x\n", cmd_pkt->port_id[2]);
+
+ cmd_pkt->dseg_count = cpu_to_le16(tot_dsds);
+
+ cmd_pkt->lun[1] = LSB(lun);
+ cmd_pkt->lun[2] = MSB(lun);
+ host_to_fcp_swap(cmd_pkt->lun, sizeof(cmd_pkt->lun));
+
+ dprintk(TRC_IO, vhba, "hndl %d: cdb buffer dump:\n",
+ (int)cmd_pkt->handle);
+ if (vhba_debug == TRC_IO) {
+ for (i = 0; i < cmd->cmd_len; i++)
+ dprintk(TRC_IO, vhba, "%x ", cmd->cmnd[i]);
+ dprintk(TRC_IO, vhba, "\n");
+ }
+
+ memcpy(cmd_pkt->fcp_cdb, cmd->cmnd, cmd->cmd_len);
+ host_to_fcp_swap(cmd_pkt->fcp_cdb, sizeof(cmd_pkt->fcp_cdb));
+
+ /*
+ * timeout_per_command(cmd) is the timeout value
+ * for the cmd from SCSI and is in milliseconds
+ * so divide by 1000 to get in secs
+ */
+
+ if ((timeout_per_command(cmd) / 1000) > 5) {
+ lcl_timeout =
+ (u16) (((timeout_per_command(cmd) / 1000) * 8) / 10);
+ } else if ((timeout_per_command(cmd) / 1000) >= 2)
+ lcl_timeout =
+ cpu_to_le16((timeout_per_command(cmd) / 1000) - 1);
+ else if ((timeout_per_command(cmd) / 1000) == 1)
+ lcl_timeout = cpu_to_le16(1);
+ else
+ lcl_timeout = cpu_to_le16(VHBA_CMD_TIMEOUT);
+
+ cmd_pkt->timeout = cpu_to_le16(lcl_timeout);
+ dprintk(TRC_IO, vhba, "sp = %p, scsi_pkt_timeout = %d\n",
+ sp, timeout_per_command(cmd));
+ cmd_pkt->byte_count = cpu_to_le32((u32) request_bufflen);
+ dprintk(TRC_IO, vhba, "hndl %d: byte cnt x%0x, lcl_timeout:0x%x\n",
+ (int)cmd_pkt->handle, cmd_pkt->byte_count, lcl_timeout);
+
+ vhba_build_scsi_iocbs(sp, cmd_pkt, tot_dsds);
+
+ if (sp->error_flag) {
+ if (scsi_sg_count(cmd))
+ ib_dma_unmap_sg(vhba->xsmp_info.ib_device,
+ sg, scsi_sg_count(cmd),
+ cmd->sc_data_direction);
+ return 1;
+ }
+
+ cmd_pkt->vp_index = ha->vp_index;
+
+ if (cmd_pkt->byte_count != cpu_to_le32((u32) request_bufflen))
+ dprintk(TRC_IO, vhba,
+ "hndl %d: byte cnt %d != req buff ",
+ (int)cmd_pkt->handle, cmd_pkt->byte_count);
+ dprintk(TRC_IO, vhba, "len %d\n",
+ cpu_to_le32((u32) request_bufflen));
+
+ if (req_cnt != 1)
+ dprintk(TRC_IO, vhba, "curr entry cnt is %d\n", req_cnt);
+ cmd_pkt->entry_count = 1;
+
+ sp->flags |= SRB_DMA_VALID;
+
+ /* Adjust ring index and send a write index update... */
+ ha->req_ring_windex++;
+ if (ha->req_ring_windex == REQUEST_ENTRY_CNT_24XX) {
+ ha->req_ring_windex = 0;
+ ha->request_ring_ptr = ha->request_ring;
+ } else
+ ha->request_ring_ptr++;
+
+ if (vhba_send_write_index(vhba)) {
+ dprintk(TRC_ERRORS, vhba, "send write index failed\n");
+ sp->flags &= ~SRB_DMA_VALID;
+ if (scsi_sg_count(sp->cmd)) {
+ ib_dma_unmap_sg(vhba->xsmp_info.ib_device,
+ scsi_sglist(sp->cmd),
+ scsi_sg_count(sp->cmd),
+ sp->cmd->sc_data_direction);
+ } else if (request_bufflen) {
+ ib_dma_unmap_single(vhba->xsmp_info.ib_device,
+ sp->dma_handle,
+ request_bufflen,
+ sp->cmd->sc_data_direction);
+ }
+ vhba_unmap_buf_fmr(vhba, sp, sp->tot_dsds);
+
+ return 1;
+ }
+
+ return 0;
+
+queuing_error:
+ if (scsi_sg_count(cmd))
+ ib_dma_unmap_sg(vhba->xsmp_info.ib_device, sg,
+ scsi_sg_count(cmd), cmd->sc_data_direction);
+ dprintk(TRC_SCSI_ERRS, vhba,
+ "Cannot queue req as IOCB to ring (err2)\n");
+ return 1;
+}
+
+int vhba_send_abort(struct virtual_hba *vhba, int abort_handle, int t)
+{
+ struct vhba_abort_cmd *abort_msg = NULL;
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct os_tgt *tq = NULL;
+ int ret = 0;
+
+ tq = TGT_Q(ha, t);
+
+ if (!tq) {
+ eprintk(vhba, "null tq context in vhba_send_abort\n");
+ return 2;
+ }
+
+ abort_msg = kmalloc(sizeof(struct vhba_abort_cmd), GFP_ATOMIC);
+ if (!abort_msg) {
+ eprintk(vhba, "kmalloc failed for send xsmp abort\n");
+ return 1;
+ }
+
+ abort_msg->type = ABORT_CMD;
+ abort_msg->handle_to_abort = abort_handle;
+ abort_msg->port_id[0] = tq->d_id.b.al_pa;
+ abort_msg->port_id[1] = tq->d_id.b.area;
+ abort_msg->port_id[2] = tq->d_id.b.domain;
+
+ dprintk(TRC_INFO, vhba,
+ "sending abort msg for handle %x p0 %x p1 %x p2 %x\n",
+ abort_handle, abort_msg->port_id[0],
+ abort_msg->port_id[1], abort_msg->port_id[2]);
+
+ /* check qp status */
+ if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED)
+ ret = xscore_post_send(&vhba->ctrl_conn.ctx,
+ (u8 *) abort_msg,
+ sizeof(struct vhba_abort_cmd),
+ XSCORE_DEFER_PROCESS);
+ else {
+ dprintk(TRC_INFO, vhba, "qp already in disconn state\n");
+ kfree(abort_msg);
+ return VHBA_QP_DISCONNECTED;
+ }
+
+ if (ret) {
+ ha->stats.ib_stats.cqp_send_err_cnt++;
+ eprintk(vhba, "xsigo ib send msg failed [%d]\n", ret);
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ kfree(abort_msg);
+ return 1;
+ }
+
+ return 0;
+}
+
+int vhba_send_tgt_reset(struct virtual_hba *vhba, int t)
+{
+ struct vhba_tgt_reset_msg *reset_msg = NULL;
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct os_tgt *tq;
+ int ret = 0;
+ int i;
+
+ tq = TGT_Q(ha, t);
+
+ /*
+ * TODO: there should be a mechanism to check whether the otgt
+ * array has been fully populated. This is a simple check in the
+ * meantime.
+ */
+ if (!tq) {
+ pr_err("null tq context in vhba_send_tgt_reset\n");
+ return 2;
+ }
+
+ reset_msg = kmalloc(sizeof(struct vhba_tgt_reset_msg), GFP_ATOMIC);
+ if (!reset_msg) {
+ eprintk(NULL, "kmalloc failed for send xsmp abort\n");
+ return 1;
+ }
+
+ reset_msg->type = TGT_RESET;
+ reset_msg->vhba_id = ha->vp_index;
+
+ for (i = 0; i < WWN_SIZE; i++)
+ reset_msg->wwpn[i] = tq->port_name[i];
+
+ /* check qp status */
+ if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED) {
+ dprintk(TRC_INFO, vhba,
+ "sending tgt reset msg for vhba %p\n", vhba);
+ ret = xscore_post_send(&vhba->ctrl_conn.ctx,
+ (u8 *) reset_msg,
+ sizeof(struct vhba_tgt_reset_msg),
+ XSCORE_DEFER_PROCESS);
+ } else {
+ dprintk(TRC_INFO, vhba, "qp already in disconn state\n");
+ kfree(reset_msg);
+ return VHBA_QP_DISCONNECTED;
+ }
+
+ if (ret) {
+ ha->stats.ib_stats.cqp_send_err_cnt++;
+ eprintk(vhba, "xsigo ib send msg failed?\n");
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ kfree(reset_msg);
+ return 1;
+ }
+
+ return 0;
+}
+
+int vhba_send_lun_reset(struct virtual_hba *vhba, int t, int l)
+{
+ struct vhba_lun_reset_msg *reset_msg = NULL;
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct os_tgt *tq;
+ struct os_lun *lq;
+ int ret = 0;
+ int i;
+
+ tq = TGT_Q(ha, t);
+
+ /*
+ * TODO: there should be a mechanism to check whether the otgt
+ * array has been fully populated. This is a simple check in the
+ * meantime.
+ */
+ if (!tq) {
+ pr_err("null tq context in vhba_send_lun_reset\n");
+ return 2;
+ }
+
+ lq = LUN_Q(ha, t, l);
+ if (!lq) {
+ pr_err("null lq context in vhba_send_lun_reset\n");
+ return 3;
+ }
+
+ reset_msg = kmalloc(sizeof(struct vhba_lun_reset_msg), GFP_ATOMIC);
+ if (!reset_msg) {
+ eprintk(NULL, "kmalloc failed for send xsmp lun reset\n");
+ return 1;
+ }
+
+ reset_msg->type = LUN_RESET;
+ reset_msg->vhba_id = ha->vp_index;
+ reset_msg->lun = (u16) l;
+
+ for (i = 0; i < WWN_SIZE; i++)
+ reset_msg->wwpn[i] = tq->port_name[i];
+
+ /* check qp status */
+ if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED) {
+ dprintk(TRC_INFO, vhba,
+ "sending lun reset msg for vhba %p\n", vhba);
+ ret = xscore_post_send(&vhba->ctrl_conn.ctx,
+ (u8 *) reset_msg,
+ sizeof(struct vhba_lun_reset_msg),
+ XSCORE_DEFER_PROCESS);
+ } else {
+ dprintk(TRC_INFO, vhba, "qp already in disconn state\n");
+ kfree(reset_msg);
+ return VHBA_QP_DISCONNECTED;
+ }
+
+ if (ret) {
+ ha->stats.ib_stats.cqp_send_err_cnt++;
+ eprintk(vhba, "xsocre_post_send() failed?\n");
+ ib_link_down(ha);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ kfree(reset_msg);
+ return 1;
+ }
+ return 0;
+}
+
+struct os_lun *vhba_allocate_lun(struct virtual_hba *vhba, u32 tgt, u32 lun)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct os_lun *lq;
+ u32 max_lun;
+
+ if (vhba->cfg->lunmask_enable)
+ max_lun = MAX_FIBRE_LUNS;
+ else
+ max_lun = MAX_FIBRE_LUNS_MORE;
+
+ /* If SCSI addressing OK, allocate LUN queue. */
+ if (tgt >= MAX_TARGETS || lun >= max_lun) {
+ eprintk(vhba,
+ "scsi(%ld): Unable to allocate lun, invalid ",
+ ha->host_no);
+ eprintk(vhba, "parameters %d %d. Returning null\n",
+ tgt, lun);
+ return NULL;
+ }
+
+ if (TGT_Q(ha, tgt) == NULL) {
+ eprintk(vhba, "Tgt %d not found in tgt_q\n", tgt);
+ return NULL;
+ }
+
+ lq = LUN_Q(ha, tgt, lun);
+ if (lq == NULL) {
+ lq = kmalloc(sizeof(struct os_lun), GFP_ATOMIC);
+ if (lq != NULL) {
+ dprintk(TRC_IO, vhba,
+ "scsi(%ld): Alloc Lun %d @ tgt %d\n",
+ ha->host_no, lun, tgt);
+
+ memset(lq, 0, sizeof(struct os_lun));
+ LUN_Q(ha, tgt, lun) = lq;
+ }
+ }
+
+ if (lq == NULL) {
+ eprintk(vhba, "Unable to allocate lun\n");
+ return NULL;
+ }
+
+ return lq;
+}
+
+static struct os_tgt *vhba_tgt_alloc(struct virtual_hba *vhba, u32 tgt)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct os_tgt *tq;
+
+ /* If SCSI addressing OK, allocate TGT queue and lock. */
+ if (tgt >= MAX_TARGETS) {
+ eprintk(vhba,
+ "scsi(%ld): Unable to allocate", ha->host_no);
+ eprintk(vhba,
+ " target, invalid target number %d. Returning null\n",
+ tgt);
+ return NULL;
+ }
+
+ tq = TGT_Q(ha, tgt);
+ if (tq == NULL) {
+ tq = kmalloc(sizeof(struct os_tgt), GFP_ATOMIC);
+ if (tq != NULL) {
+ dprintk(TRC_IO, vhba,
+ "scsi(%ld): Alloc Target %d @ %p\n",
+ ha->host_no, tgt, tq);
+ memset(tq, 0, sizeof(struct os_tgt));
+ tq->ha = ha;
+ tq->init_done = 0;
+ TGT_Q(ha, tgt) = tq;
+ tq->state = VHBA_IO_STATE_ACTIVE;
+ }
+ }
+
+ if (tq != NULL) {
+ tq = TGT_Q(ha, tgt);
+ if (tq)
+ dprintk(TRC_IO, vhba, "tq is same as TGT_Q\n");
+ else
+ dprintk(TRC_IO, vhba, "tq is not same as TGT_Q\n");
+ } else
+ eprintk(vhba, "Unable to allocate target\n");
+
+ return tq;
+}
+
+static u32
+vhba_target_bind(struct virtual_hba *vhba, u32 loop_id, u8 *node_name,
+ u8 *port_name, u32 port_id, s32 bound_value,
+ u32 lun_count, u8 *lun_map, u16 *lun_id, u8 media_type)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct os_tgt *tq;
+ struct fc_port *fcport;
+ struct fc_port *fcporttemp;
+ unsigned long flags;
+ u32 tgt;
+ int port_found;
+ int k, id;
+
+ port_found = 0;
+ spin_lock_irqsave(&ha->list_lock, flags);
+
+ list_for_each_entry_safe(fcport, fcporttemp, &ha->disc_ports, list) {
+ if (memcmp(port_name, fcport->port_name, WWN_SIZE) == 0) {
+ port_found = 1;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&ha->list_lock, flags);
+
+ if (port_found) {
+ /*
+ * Port must be already bound at a particular location
+ * Just set the state and flags
+ */
+ dprintk(TRC_IO, NULL,
+ "port already exists, so just updating info\n");
+ fcport->d_id.b24 = port_id;
+ fcport->loop_id = loop_id;
+ fcport->lun_count = lun_count;
+ if (fcport->tgt_queue) {
+ fcport->tgt_queue->d_id.b24 = fcport->d_id.b24;
+ set_bit(TQF_ONLINE, &fcport->tgt_queue->flags);
+ }
+ if (media_type == TYPE_TAPE)
+ fcport->flags |= FCF_TAPE_PRESENT;
+ else
+ fcport->flags &= ~FCF_TAPE_PRESENT;
+ atomic_set(&fcport->state, FCS_ONLINE);
+ return 0;
+ }
+
+ fcport = kmalloc(sizeof(struct fc_port), GFP_ATOMIC);
+
+ if (!fcport) {
+ eprintk(vhba, "Couldn't allocate fcport\n");
+ return 1;
+ }
+ memset(fcport, 0, sizeof(struct fc_port));
+ fcport->loop_id = loop_id;
+ fcport->lun_count = lun_count;
+ fcport->supported_classes |= FC_COS_CLASS3;
+
+ for (k = 0; k < lun_count; k++)
+ fcport->lun_ids[k] = -1;
+
+ for (k = 0; k < lun_count; k++) {
+ if (lun_id) {
+ dprintk(TRC_IO, vhba,
+ "Adding lun id %d to list\n", lun_id[k]);
+ fcport->lun_ids[k] = lun_id[k];
+ } else {
+ dprintk(TRC_IO, vhba,
+ "Setting lun id %d to 0 in list\n", lun_id[k]);
+ fcport->lun_ids[k] = 0;
+ }
+ }
+
+ id = fcport->loop_id;
+ dprintk(TRC_IO, vhba, "fcport loop id:%d\n", id);
+ fcport->d_id.b24 = port_id;
+
+ memcpy(fcport->port_name, port_name, WWN_SIZE);
+ memcpy(fcport->node_name, node_name, WWN_SIZE);
+ fcport->persistent_binding = bound_value;
+
+ add_to_disc_ports(fcport, vhba);
+
+ /*
+ * Check for persistent binding.
+ * if bound value is not -1 then check for valid place...
+ * validate bound value <= 0 and < 256
+ */
+ tgt = (u32) bound_value;
+ if (bound_value != -1) {
+ tq = TGT_Q(ha, tgt);
+ if (tq == NULL) {
+ tq = vhba_tgt_alloc(vhba, tgt);
+ if (tq != NULL) {
+ memcpy(tq->node_name, fcport->node_name,
+ WWN_SIZE);
+ memcpy(tq->port_name, fcport->port_name,
+ WWN_SIZE);
+ tq->d_id.b24 = fcport->d_id.b24;
+ fcport->bound = 1;
+ fcport->os_target_id = tgt;
+ fcport->tgt_queue = tq;
+ tq->fcport = fcport;
+ if (media_type == TYPE_TAPE)
+ fcport->flags |= FCF_TAPE_PRESENT;
+ else
+ fcport->flags &= ~FCF_TAPE_PRESENT;
+ set_bit(TQF_ONLINE, &tq->flags);
+ atomic_set(&fcport->state, FCS_ONLINE);
+ if (ha->max_tgt_id < tgt)
+ ha->max_tgt_id = tgt;
+ } else {
+ id = fcport->loop_id;
+ fcport->bound = 0;
+ eprintk(vhba,
+ "Unable to allocate tgt");
+ eprintk(vhba, " for fc_port %d\n", id);
+ return 1;
+ }
+ } else {
+ id = fcport->loop_id;
+ eprintk(vhba,
+ "Cannot bind fc_port %d at tgt %d\n",
+ id, tgt);
+ /* Make the port unbound which will be
+ added later to the map */
+ fcport->bound = 0;
+ return 1;
+ }
+ } else {
+ /* Make the port unbound which will be added later to the map */
+ fcport->bound = 0;
+ if (media_type == TYPE_TAPE)
+ fcport->flags |= FCF_TAPE_PRESENT;
+ else
+ fcport->flags &= ~FCF_TAPE_PRESENT;
+ }
+ return 0;
+}
+
+void vhba_set_tgt_count(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct os_tgt *tq;
+ int tgt;
+
+ ha->target_count = 0;
+ ha->max_tgt_id = 0;
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ tq = TGT_Q(ha, tgt);
+ if (tq != NULL) {
+ if (atomic_read(&tq->fcport->state) == FCS_ONLINE) {
+ dprintk(TRC_INFO, vhba,
+ "tgt[%d]: nport_id: 0x%x\n",
+ tgt, tq->d_id.b24);
+ ha->target_count++;
+ if (ha->max_tgt_id < tgt)
+ ha->max_tgt_id = tgt;
+ }
+ }
+ }
+
+ if (ha->target_count > 0)
+ ha->max_tgt_id++;
+
+ if (ha->max_tgt_id < ha->target_count)
+ ha->max_tgt_id = ha->target_count;
+
+ ha->max_targets = ha->max_tgt_id;
+ dprintk(TRC_INFO, vhba,
+ "RSCN: max id = %d max targets = %d tgt count = %d\n",
+ ha->max_tgt_id, ha->max_targets, ha->target_count);
+}
+
+static u32 vhba_map_unbound_targets(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct os_tgt *tq;
+ struct fc_port *fcport;
+ struct fc_port *fcporttemp;
+ u32 tgt;
+ int id;
+ int free_tgt_found = 0;
+
+ list_for_each_entry_safe(fcport, fcporttemp, &ha->disc_ports, list) {
+ if (fcport->bound)
+ continue;
+ tgt = ha->max_tgt_id;
+ while (free_tgt_found == 0) {
+ tq = TGT_Q(ha, tgt);
+ if (tq == NULL) {
+ free_tgt_found = 1;
+ break;
+ }
+ tgt++;
+ if (tgt == ha->max_tgt_id)
+ break;
+ if (tgt > MAX_TARGETS)
+ tgt = 0;
+ }
+ if (free_tgt_found == 0) {
+ dprintk(TRC_SCSI_ERRS, vhba, "Tgt map is full\n");
+ return 1;
+ }
+ free_tgt_found = 0;
+ tq = vhba_tgt_alloc(vhba, tgt);
+ if (tq != NULL) {
+ memcpy(tq->node_name, fcport->node_name, WWN_SIZE);
+ memcpy(tq->port_name, fcport->port_name, WWN_SIZE);
+ tq->d_id.b24 = fcport->d_id.b24;
+ fcport->bound = 1;
+ fcport->os_target_id = tgt;
+ fcport->tgt_queue = tq;
+ tq->fcport = fcport;
+ set_bit(TQF_ONLINE, &tq->flags);
+ atomic_set(&fcport->state, FCS_ONLINE);
+ } else {
+ id = fcport->loop_id;
+ eprintk(vhba, "alloc failed for fc_port %x" "\n", id);
+ return 1;
+ }
+ }
+ vhba_set_tgt_count(vhba);
+ return 0;
+}
+
+void vhba_mark_tgts_lost(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ struct os_tgt *tq;
+ u16 tgt;
+
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ tq = TGT_Q(ha, tgt);
+ if (tq == NULL)
+ continue;
+ set_bit(TQF_SUSPENDED, &tq->flags);
+ atomic_set(&tq->fcport->state, FCS_DEVICE_LOST);
+ }
+}
+
+void ib_link_dead_poll(struct scsi_xg_vhba_host *ha)
+{
+ struct virtual_hba *vhba = ha->vhba;
+
+ if (atomic_read(&ha->ib_status) != VHBA_IB_DOWN)
+ return;
+
+ if (atomic_read(&ha->ib_link_down_cnt)) {
+ if (!atomic_dec_and_test(&ha->ib_link_down_cnt))
+ return;
+ } else
+ return;
+
+ atomic_set(&ha->ib_status, VHBA_IB_DEAD);
+ dprintk(TRC_INFO, vhba, "Marking IB link dead\n");
+}
+
+void ib_link_down(struct scsi_xg_vhba_host *ha)
+{
+ struct virtual_hba *vhba = ha->vhba;
+ struct vhba_xsmp_msg *msg;
+ u32 ib_timeout;
+
+ if (atomic_read(&ha->ib_status) != VHBA_IB_UP)
+ return;
+
+ msg = (struct vhba_xsmp_msg *)vhba->cfg;
+
+ ib_timeout = msg->linkdowntimeout;
+
+ if (ib_timeout > 60)
+ ib_timeout = 60;
+ dprintk(TRC_INFO, vhba, "IB down, timer=%d\n", ib_timeout);
+
+ if (ib_timeout < 5) {
+ atomic_set(&ha->ib_status, VHBA_IB_DEAD);
+ } else {
+ atomic_set(&ha->ib_status, VHBA_IB_DOWN);
+ atomic_set(&ha->ib_link_down_cnt,
+ ib_timeout / WQ_PERIODIC_TIMER);
+ }
+}
+
+void dump_iocb(struct cmd_type_7 *cmd_pkt)
+{
+
+ pr_alert("IOCB Data:\n");
+ pr_alert("Entry Type: 0x%x\tEntry Count: 0x%x\n",
+ cmd_pkt->entry_type, cmd_pkt->entry_count);
+ pr_alert("IOCB Handle : 0x%x\n", cmd_pkt->handle);
+ pr_alert("N_Port Handle: 0x%x\n", cmd_pkt->nport_handle);
+ pr_alert("Data Segment Count: 0x%x\tFCP_LUN: 0x%x\n",
+ cmd_pkt->dseg_count, cmd_pkt->lun[0]);
+ pr_alert("Task (Operation): 0x%x\tTotal Data Byte Count: 0x%x\n",
+ cmd_pkt->task_mgmt_flags, cmd_pkt->byte_count);
+ pr_alert("Target ID (Port ID): [0]: 0x%x\t[1]: 0x%x\t[2]: 0x%x\n",
+ cmd_pkt->port_id[0], cmd_pkt->port_id[1], cmd_pkt->port_id[2]);
+ pr_alert("VP Index: 0x%x\tData Segment Length: 0x%x\n",
+ cmd_pkt->vp_index, cmd_pkt->dseg_0_len);
+ pr_alert("Data Segment Address: 0x%x_%x\n",
+ cmd_pkt->dseg_0_address[1], cmd_pkt->dseg_0_address[0]);
+ pr_alert("\n");
+}
+
+/*
+ * Used by San Boot.
+ * Returns 1 if atleast one Disc Is Up.
+ * Returns 0 if all Discs are Not Ready
+ */
+int vhba_check_discs_status(void)
+{
+ struct virtual_hba *vhba = NULL;
+
+ read_lock_bh(&vhba_global_lock);
+ list_for_each_entry(vhba, &vhba_g.list, list) {
+ if (vhba->ha->discs_ready_flag) {
+ read_unlock_bh(&vhba_global_lock);
+ dprintk(TRC_ERRORS, vhba,
+ "vhba_check_discs_status: found 1 disc Up\n");
+ return 1;
+ }
+ }
+ read_unlock_bh(&vhba_global_lock);
+ dprintk(TRC_ERRORS, vhba, "vhba_check_discs_status:No disc is Up\n");
+ return 0;
+}
+EXPORT_SYMBOL(vhba_check_discs_status);
+
+/*
+ * Used by San Boot.
+ * Returns # of VHBAs created.
+ */
+
+int check_number_of_vhbas_provisioned(void)
+{
+ return atomic_read(&vhba_count);
+}
+EXPORT_SYMBOL(check_number_of_vhbas_provisioned);
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * vhba_main.c
+ */
+
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+/* #include <linux/smp_lock.h> */
+#include <linux/proc_fs.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include "vhba_os_def.h"
+#include "vhba_ib.h"
+#include "vhba_defs.h"
+
+#include "xscore.h"
+#include "vhba_xsmp.h"
+#include "xsmp_session.h"
+
+#ifndef XSIGO_LOCAL_VERSION
+#define DRIVER_VERSION "0.5.1"
+#else
+#define DRIVER_VERSION XSIGO_LOCAL_VERSION
+#endif
+
+#define DRIVER_VERSION_STRING "Xsigo Virtual HBA Driver for Linux v" \
+ DRIVER_VERSION
+#define VHBA_MAJOR 0
+
+MODULE_AUTHOR("Oracle corp (OVN-linux-drivers@oracle.com)");
+MODULE_DESCRIPTION("OVN VHBA Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+/***********************************
+ * Module parameters: starts here *
+ ***********************************/
+
+int cmds_per_lun = 16;
+module_param(cmds_per_lun, int, S_IRWXU);
+
+int vhba_multiple_q = 1;
+module_param(vhba_multiple_q, int, S_IRWXU);
+
+int vhba_max_transfer_size = VHBA_DEFAULT_TRANSFER_SIZE;
+module_param(vhba_max_transfer_size, int, S_IRWXU);
+
+int vhba_max_q_depth = VHBA_MAX_VH_Q_DEPTH;
+module_param(vhba_max_q_depth, int, S_IRWXU);
+
+int vhba_debug = 0x200001;
+module_param(vhba_debug, int, 0644);
+
+int vhba_max_scsi_retry = VHBA_MAX_SCSI_RETRY;
+module_param(vhba_max_scsi_retry, int, S_IRWXU);
+
+int vhba_default_scsi_timeout = VHBA_DEFAULT_SCSI_TIMEOUT;
+module_param(vhba_default_scsi_timeout, int, S_IRWXU);
+
+int vhba_wait_in_boot = 1;
+module_param(vhba_wait_in_boot, int, 0644);
+
+int vhba_wait_per_vhba = 30;
+module_param(vhba_wait_per_vhba, int, 0644);
+
+int vhba_abort_recovery_count = 15; /* 15*2 = 30 seconds */
+module_param(vhba_abort_recovery_count, int, 0644);
+
+/****************************************
+ * Module parameters: Ends here *
+ ****************************************/
+struct scsi_transport_template *vhba_transport_template;
+unsigned long vhba_wait_time;
+char vhba_version_str[40];
+int vhba_xsmp_service_id;
+int vhba_max_dsds_in_fmr;
+int vhba_max_fmr_pages;
+
+DEFINE_IDR(vhba_idr_table);
+
+u32 vhba_current_idr = MAX_VHBAS;
+atomic_t vhba_count;
+rwlock_t vhba_global_lock;
+
+struct virtual_hba vhba_g;
+struct vhba_xsmp_stats vhba_xsmp_stats;
+
+static const struct file_operations vhba_fops = {
+read: vhba_read,
+write : vhba_write,
+open : vhba_open,
+release : vhba_release,
+};
+
+int vhba_wait_all_vhbas_up(void)
+{
+ int time, delayms = 200;
+ int vhba_count = 0;
+
+ /* Wait for 30 seconds */
+ dprintk(TRC_INIT, NULL, "%s Checking VHBA's state\n", __func__);
+
+ for (time = 0; time < vhba_wait_per_vhba * 1000; time += delayms) {
+ vhba_count = check_number_of_vhbas_provisioned();
+ if (vhba_count > 0) {
+ dprintk(TRC_INIT, NULL, "%s Found %d vhbas\n",
+ __func__, vhba_count);
+ break;
+ }
+ msleep(delayms);
+ }
+
+ if (vhba_count <= 0) {
+ dprintk(TRC_INIT, NULL, "%s Found 0 vhbas\n", __func__);
+
+ return 0;
+ }
+
+ /* Wait for 100 seconds */
+ for (time = 0; time < 500; time++) {
+ if (vhba_check_discs_status()) {
+ dprintk(TRC_INIT, NULL, "%s Found disc status\n",
+ __func__);
+ return 1;
+ }
+ msleep(delayms);
+ }
+
+ return 0;
+}
+
+static void vhba_wait_for_vhbas(void)
+{
+ unsigned long wait_time = jiffies;
+
+ if (vhba_wait_in_boot && xscore_wait_for_sessions(0)) {
+ pr_info("XSVHBA: Waiting for VHBA's to come up .....\n");
+ if (vhba_wait_all_vhbas_up()) {
+ dprintk(TRC_INIT, NULL,
+ "%s VHBA's are ready with discs\n", __func__);
+ } else {
+ dprintk(TRC_INIT, NULL,
+ "%s VHBA's are NOT ready with discs\n",
+ __func__);
+ }
+ }
+ vhba_wait_time = jiffies - wait_time;
+}
+
+int dev_major;
+
+/*
+ * vhba_module_init - Module initialization.
+ */
+static int __init vhba_module_init(void)
+{
+ dprintk(TRC_INIT, NULL, "%s\n", DRIVER_VERSION_STRING);
+ dprintk(TRC_INIT, NULL, "Driver queue depth is %d\n", cmds_per_lun);
+ dprintk(TRC_INIT, NULL, "Driver max transfer size is %dKB\n",
+ vhba_max_transfer_size / 2);
+ dprintk(TRC_INIT,
+ NULL, "\nBuild date: " __DATE__ " @ " __TIME__ "\n\n");
+
+ /* Probably needs to be added to the regular linux driver */
+ vhba_transport_template =
+ fc_attach_transport(&vhba_transport_functions);
+
+ vhbawq_init();
+ vhbawq_queue();
+
+ rwlock_init(&vhba_global_lock);
+ INIT_LIST_HEAD(&vhba_g.list);
+
+ /* Register with XCPM module for receiving XSMP messages */
+ if (vhba_register_xsmp_service()) {
+ eprintk(NULL, "vhba_register_xsmp_service() failed!\n");
+ goto init_failed;
+ }
+
+ if (vhba_create_procfs_root_entries()) {
+ eprintk(NULL, "vhba_create_procfs_root_entries() failed!\n");
+ vhba_unregister_xsmp_service();
+ goto init_failed;
+ }
+
+ /* register a character interface here... */
+ dev_major = register_chrdev(VHBA_MAJOR, "svhba", &vhba_fops);
+
+ if (dev_major < 0) {
+ dprintk(TRC_ERRORS,
+ NULL, "char device registration failed for vhba\n");
+ eprintk(NULL, "register chrdev() failed\n");
+ vhba_unregister_xsmp_service();
+ vhba_remove_procfs_root_entries();
+ goto init_failed;
+ }
+ /* Wait for vhba's to come up */
+ vhba_wait_for_vhbas();
+ return 0;
+
+init_failed:
+ fc_release_transport(vhba_transport_template);
+ return -1;
+}
+
+/*
+ * vhba_module_exit - Module cleanup routine.
+ */
+static void __exit vhba_module_exit(void)
+{
+ struct virtual_hba *vhba;
+ struct virtual_hba *tmp_vhba;
+
+ vhba_unregister_xsmp_service();
+
+ vhbawq_cleanup();
+
+ list_for_each_entry_safe(vhba, tmp_vhba, &vhba_g.list, list) {
+ if (vhba->cfg)
+ wake_up_interruptible(&vhba->timer_wq);
+ vhba_delete(vhba->resource_id);
+ }
+ vhba_remove_procfs_root_entries();
+
+ if (dev_major >= 0)
+ unregister_chrdev(dev_major, "svhba");
+
+ fc_release_transport(vhba_transport_template);
+
+ dprintk(0, NULL, "Xsigo Virtual HBA driver is unloaded\n");
+}
+
+ssize_t vhba_read(struct file *filp, char *buf, size_t size, loff_t *offp)
+{
+ return 0;
+}
+
+ssize_t vhba_write(struct file *filp, const char *buf, size_t size,
+ loff_t *offp)
+{
+ return 0;
+}
+
+int vhba_open(struct inode *inode, struct file *filp)
+{
+ int minor;
+
+ minor = MINOR(inode->i_rdev);
+ return 0;
+}
+
+int vhba_release(struct inode *inode, struct file *filp)
+{
+ int minor;
+
+ minor = MINOR(inode->i_rdev);
+ return 0;
+}
+
+/*
+ * Called from thread context
+ */
+static void vhba_xsmp_event_handler(xsmp_cookie_t xsmp_hndl, int event)
+{
+ struct virtual_hba *vhba, *tmp_vhba;
+ unsigned long flags = 0;
+
+ switch (event) {
+ case XSCORE_CONN_CONNECTED:
+ read_lock_bh(&vhba_global_lock);
+ list_for_each_entry(vhba, &vhba_g.list, list) {
+ if (xsmp_sessions_match(&vhba->xsmp_info, xsmp_hndl))
+ vhba->xsmp_hndl = xsmp_hndl;
+ }
+ read_unlock_bh(&vhba_global_lock);
+ break;
+ case XSCORE_DEVICE_REMOVAL:
+ read_lock_irqsave(&vhba_global_lock, flags);
+ list_for_each_entry_safe(vhba, tmp_vhba, &vhba_g.list, list) {
+ if (xsmp_sessions_match(&vhba->xsmp_info, xsmp_hndl)) {
+ read_unlock_irqrestore(&vhba_global_lock,
+ flags);
+ (void)vhba_delete(vhba->resource_id);
+ read_lock_irqsave(&vhba_global_lock, flags);
+ }
+ }
+ read_unlock_irqrestore(&vhba_global_lock, flags);
+ break;
+ /* At present we don't need to worry about any other cases */
+ case XSCORE_PORT_UP:
+ case XSCORE_PORT_DOWN:
+ default:
+ break;
+ }
+}
+
+int vhba_register_xsmp_service(void)
+{
+ struct xsmp_service_reg_info service_info = {
+ .receive_handler = vhba_receive_handler,
+ .event_handler = vhba_xsmp_event_handler,
+ .ctrl_message_type = XSMP_MESSAGE_TYPE_VHBA,
+ .resource_flag_index = RESOURCE_FLAG_INDEX_VHBA
+ };
+
+ vhba_xsmp_service_id = xcpm_register_service(&service_info);
+ if (vhba_xsmp_service_id < 0) {
+ eprintk(NULL, "Unable to register with XCPM\n");
+ return -1;
+ }
+ return 0;
+}
+
+void vhba_unregister_xsmp_service(void)
+{
+ int ret = 0;
+
+ ret = xcpm_unregister_service(vhba_xsmp_service_id);
+ if (ret != 0) {
+ eprintk(NULL, "Unable to unregister from XCPM %d\n", ret);
+ } else
+ dprintk(TRC_INIT, NULL, "Completed xcpm unregister\n");
+}
+
+module_init(vhba_module_init);
+module_exit(vhba_module_exit);
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __VHBA_OSDEF_H__
+#define __VHBA_OSDEF_H__
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/idr.h>
+#include <linux/dma-mapping.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/dmapool.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_eh.h>
+
+#include <scsi/scsi_transport_fc.h>
+
+#include <rdma/ib_verbs.h>
+#include "xscore.h"
+#include "xsmp_common.h"
+
+#define xg_spin_lock_irqsave(lock, flags) \
+ spin_lock_irqsave(lock, flags)
+
+#define xg_spin_unlock_irqrestore(lock, flags) \
+ spin_unlock_irqrestore(lock, flags)
+
+#define xg_spin_lock_irq(lock) \
+ spin_lock_irq(lock)
+
+#define xg_spin_unlock_irq(lock) \
+ spin_unlock_irq(lock)
+
+#define IB_WC_MSG_LEN (128+8)
+
+#define VHBA_DEFAULT_SCSI_TIMEOUT 60 /* 60 seconds */
+#define EXTEND_CMD_TIMEOUT 80 /* 80 seconds */
+#define IB_CMD_TIMEOUT_DELTA 3 /* 3 seconds */
+#define DEFER_LIST_TIMEOUT 70 /* 70 seconds */
+#define WQ_PERIODIC_TIMER 5 /* 5 seconds */
+#define PERIODIC_DEFER_CNT (DEFER_LIST_TIMEOUT/WQ_PERIODIC_TIMER)
+
+#define VHBA_MAX_SCSI_RETRY 60
+#define FORCE_FLUSH_DEFE_LIST 1
+#define NO_FORCE_FLUSH_DEFE_LIST 0
+
+#define INVALID_FIELD_IN_CDB 0x24
+
+#define VHBA_STATE_NOT_ACTIVE 0
+#define VHBA_STATE_ACTIVE 1
+#define VHBA_STATE_SCAN 2
+#define VHBA_STATE_BUSY 3
+
+#define VHBA_QID_ENABLE 1
+#define VHBA_MAX_VH_Q_DEPTH 16
+#define VHBA_MAX_VH_Q_COUNT 4
+
+/* SCSI maximum CDB size */
+#define MAX_CMDSZ 16
+#define MAX_OUTSTANDING_COMMANDS 1024
+#define MAX_IO_DESCRIPTORS 32
+#define MAX_FIBRE_TARGETS 128
+#define MAX_FIBRE_LUNS 256
+#define MAX_FIBRE_LUNS_MORE 256
+
+#define MAX_BUSES 1
+#define MAX_TARGETS MAX_FIBRE_TARGETS
+#define MAX_VHBA_QUEUES 4
+
+#define REQUEST_ENTRY_CNT_24XX 1024 /* Number of request entries */
+
+/*
+ * Status entry SCSI status bit definitions
+ */
+/* Reserved bits BIT_12-BIT_15 */
+#define SS_MASK 0xfff
+#define SS_RESIDUAL_UNDER BIT_11
+#define SS_RESIDUAL_OVER BIT_10
+#define SS_SENSE_LEN_VALID BIT_9
+#define SS_RESPONSE_INFO_LEN_VALID BIT_8
+
+#define SS_RESERVE_CONFLICT (BIT_4 | BIT_3)
+#define SS_BUSY_CONDITION BIT_3
+#define SS_CONDITION_MET BIT_2
+#define SS_CHECK_CONDITION BIT_1
+
+/*
+ * Status entry completion status
+ */
+#define CS_COMPLETE 0x0 /* No errors */
+#define CS_INCOMPLETE 0x1 /* Incomplete transfer of cmd */
+#define CS_DMA 0x2 /* A DMA direction error. */
+#define CS_TRANSPORT 0x3 /* Transport error */
+#define CS_RESET 0x4 /* SCSI bus reset occurred */
+#define CS_ABORTED 0x5 /* System aborted command */
+#define CS_TIMEOUT 0x6 /* Timeout error */
+#define CS_DATA_OVERRUN 0x7 /* Data overrun */
+
+#define CS_DATA_UNDERRUN 0x15 /* Data Underrun */
+#define CS_QUEUE_FULL 0x1C /* Queue Full */
+#define CS_PORT_UNAVAILABLE 0x28 /* Port unavailable */
+ /* (selection timeout) */
+#define CS_PORT_LOGGED_OUT 0x29 /* Port Logged Out */
+#define CS_PORT_CONFIG_CHG 0x2A /* Port Configuration Changed */
+#define CS_PORT_BUSY 0x2B /* Port Busy */
+#define CS_COMPLETE_CHKCOND 0x30 /* Error? */
+#define CS_BAD_PAYLOAD 0x80 /* Driver defined */
+#define CS_UNKNOWN 0x81 /* Driver defined */
+#define CS_RETRY 0x82 /* Driver defined */
+#define CS_LOOP_DOWN_ABORT 0x83 /* Driver defined */
+
+#define WWN_SIZE 8
+
+#define LINK_DOWN 0
+#define LINK_UP 1
+#define LINK_DEAD 2
+
+#define TGT_LOST 1
+#define TGT_FOUND 0
+#define TGT_DEAD 2
+
+#define LUN_ID_SCHEME
+
+struct xt_cm_private_data {
+ u64 vid;
+ u16 qp_type;
+ u16 max_ctrl_msg_size;
+ u32 data_qp_type;
+} __packed;
+
+struct xg_scsi_lun {
+ u8 scsi_lun[8];
+};
+
+struct _vhba_init_ {
+ u8 port_id[3];
+ u8 vp_index;
+ u16 n_port_handle;
+ u16 lun;
+};
+
+struct vhba_io_cmd {
+ u8 cmd[MAX_CMDSZ];
+ u32 cmd_len;
+ u8 *buf[6];
+ u32 buf_size[6];
+};
+
+#define ADD_VHBA 1
+#define DELETE_VHBA 2
+#define SEND_NOP 3
+#define SEND_DISK_READ 4
+#define SEND_DISK_WRITE 5
+#define SET_LID 6
+
+union target_id {
+ u16 extended;
+ struct {
+ u8 reserved;
+ u8 standard;
+ } id;
+};
+
+#define COMMAND_TYPE_7 0x18
+struct cmd_type_7 {
+ u8 entry_type; /* Entry type */
+ u8 entry_count; /* Entry count */
+ u8 sys_define; /* System defined */
+ u8 entry_status; /* Entry Status */
+
+ u32 handle; /* System handle */
+
+ u16 nport_handle; /* N_PORT handle */
+ u16 timeout; /* Command timeout */
+#define FW_MAX_TIMEOUT 0x1999
+
+ u16 dseg_count; /* Data segment count */
+ u16 reserved_1;
+
+ u8 lun[8]; /* FCP LUN (BE) */
+
+ u16 task_mgmt_flags; /* Task management flags */
+
+#define TMF_CLEAR_ACA BIT_14
+#define TMF_TARGET_RESET BIT_13
+#define TMF_LUN_RESET BIT_12
+#define TMF_CLEAR_TASK_SET BIT_10
+#define TMF_ABORT_TASK_SET BIT_9
+#define TMF_READ_DATA BIT_1
+#define TMF_WRITE_DATA BIT_0
+
+ u8 task;
+#define TSK_SIMPLE 0
+#define TSK_HEAD_OF_QUEUE 1
+#define TSK_ORDERED 2
+#define TSK_ACA 4
+#define TSK_UNTAGGED 5
+
+ u8 crn;
+
+ u8 fcp_cdb[MAX_CMDSZ]; /* SCSI command words */
+ u32 byte_count; /* Total byte count */
+
+ u8 port_id[3]; /* PortID of destination port */
+ u8 vp_index;
+
+ u32 dseg_0_address[2]; /* Data segment 0 address */
+ u32 dseg_0_len; /* Data segment 0 length */
+
+ u32 rkey1; /* Xg extensions to IOCBS */
+ u32 rkey2; /* to accommodate */
+ u32 rkey3; /* rkeys for dsds */
+ u32 rkey4;
+ u32 rkey5;
+
+ u32 xg_rsvd[11];
+};
+
+#define CONTINUE_A64_TYPE 0x0A /* Continuation A64 entry */
+struct cont_a64_entry {
+ u8 entry_type; /* Entry type */
+ u8 entry_count; /* Entry count */
+ u8 sys_define; /* System defined */
+ u8 entry_status; /* Entry Status */
+ u32 dseg_0_address[2]; /* Data segment 0 address */
+ u32 dseg_0_length; /* Data segment 0 length */
+ u32 dseg_1_address[2]; /* Data segment 1 address */
+ u32 dseg_1_length; /* Data segment 1 length */
+ u32 dseg_2_address[2]; /* Data segment 2 address */
+ u32 dseg_2_length; /* Data segment 2 length */
+ u32 dseg_3_address[2]; /* Data segment 3 address */
+ u32 dseg_3_length; /* Data segment 3 length */
+ u32 dseg_4_address[2]; /* Data segment 4 address */
+ u32 dseg_4_length; /* Data segment 4 length */
+
+ u32 rkey1; /* Xg extensions to IOCBS */
+ u32 rkey2; /* to accommodate */
+ u32 rkey3; /* rkeys for dsds */
+ u32 rkey4;
+ u32 rkey5;
+
+ u32 xg_rsvd[11];
+};
+
+#define STATUS_TYPE 0x03 /* Status entry */
+struct sts_entry_24xx {
+ u8 entry_type; /* Entry type */
+ u8 entry_count; /* Entry count */
+ u8 sys_define; /* System defined */
+ u8 entry_status; /* Entry Status */
+
+ u32 handle; /* System handle */
+
+ u16 comp_status; /* Completion status */
+ u16 ox_id; /* OX_ID used by the firmware */
+
+ u32 residual_len; /* Residual transfer length */
+
+ u16 reserved_1;
+ u16 state_flags; /* State flags */
+#define SF_TRANSFERRED_DATA BIT_11
+#define SF_FCP_RSP_DMA BIT_0
+
+ u16 reserved_2;
+ u16 scsi_status; /* SCSI status */
+#define SS_CONFIRMATION_REQ BIT_12
+
+ u32 rsp_residual_count; /* FCP RSP residual count */
+
+ u32 sense_len; /* FCP SENSE length */
+ u32 rsp_data_len; /* FCP response data length */
+
+ u8 data[28]; /* FCP response/sense information */
+};
+
+/*
+* Status entry completion status
+*/
+#define CS_DATA_REASSEMBLY_ERROR 0x11 /* Data Reassembly Error */
+#define CS_ABTS_BY_TARGET 0x13 /* Target send ABTS to abort IOCB */
+#define CS_FW_RESOURCE 0x2C /* Firmware Resource Unavailable */
+#define CS_TASK_MGMT_OVERRUN 0x30 /* Task management overrun (8+) */
+#define CS_ABORT_BY_TARGET 0x47 /* Abort By Target */
+
+#define STATUS_CONT_TYPE 0x10 /* Status continuation entry */
+struct sts_cont_entry {
+ u8 entry_type; /* Entry type */
+ u8 entry_count; /* Entry count */
+ u8 sys_define; /* System defined */
+ u8 entry_status; /* Entry Status */
+ u8 data[60]; /* data */
+};
+
+#define MARKER_TYPE 0x04 /* Marker entry */
+struct mrk_entry_24xx {
+ u8 entry_type; /* Entry type */
+ u8 entry_count; /* Entry count */
+ u8 handle_count; /* Handle count */
+ u8 entry_status; /* Entry Status */
+
+ u32 handle; /* System handle */
+
+ u16 nport_handle; /* N_PORT handle */
+
+ u8 modifier; /* Modifier (7-0) */
+#define MK_SYNC_ID_LUN 0 /* Synchronize ID/LUN */
+#define MK_SYNC_ID 1 /* Synchronize ID */
+#define MK_SYNC_ALL 2 /* Synchronize all ID/LUN */
+ u8 reserved_1;
+
+ u8 reserved_2;
+ u8 vp_index;
+
+ u16 reserved_3;
+
+ u8 lun[8]; /* FCP LUN (BE) */
+ u8 reserved_4[40];
+};
+
+struct response {
+ u8 data[60];
+ u32 signature;
+#define RESPONSE_PROCESSED 0xDEADDEAD /* Signature */
+};
+
+#define ABORT_IOCB_TYPE 0x33
+struct abort_entry_24xx {
+ u8 entry_type; /* Entry type */
+ u8 entry_count; /* Entry count */
+ u8 handle_count; /* Handle count */
+ u8 entry_status; /* Entry Status */
+
+ u32 handle; /* System handle */
+
+ u16 nport_handle; /* N_PORT handle */
+ /* or Completion status */
+
+ u16 options; /* Options */
+#define AOF_NO_ABTS BIT_0 /* Do not send any ABTS */
+
+ u32 handle_to_abort; /* System handle to abort */
+
+ u8 reserved_1[32];
+
+ u8 port_id[3]; /* PortID of destination port */
+ u8 vp_index;
+
+ u8 reserved_2[12];
+};
+
+#define TSK_MGMT_IOCB_TYPE 0x14
+struct tsk_mgmt_entry {
+ u8 entry_type; /* Entry type */
+ u8 entry_count; /* Entry count */
+ u8 handle_count; /* Handle count */
+ u8 entry_status; /* Entry Status */
+
+ u32 handle; /* System handle */
+
+ u16 nport_handle; /* N_PORT handle */
+
+ u16 reserved_1;
+
+ u16 delay; /* Activity delay in seconds */
+
+ u16 timeout; /* Command timeout */
+
+ u8 lun[8]; /* FCP LUN (BE) */
+
+ u32 control_flags; /* Control Flags */
+#define TCF_NOTMCMD_TO_TARGET BIT_31
+#define TCF_LUN_RESET BIT_4
+#define TCF_ABORT_TASK_SET BIT_3
+#define TCF_CLEAR_TASK_SET BIT_2
+#define TCF_TARGET_RESET BIT_1
+#define TCF_CLEAR_ACA BIT_0
+
+ u8 reserved_2[20];
+
+ u8 port_id[3]; /* PortID of destination port */
+ u8 vp_index;
+
+ u8 reserved_3[12];
+};
+
+struct scsi_xg_vhba_host;
+
+#define MAX_VHBAS 32
+
+/* Messages on Data QP */
+#define INIT_BLOCK 0x1
+#define WRITE_INDEX_UPDATE 0x2
+#define RING_UPDATE 0x5
+
+#define READ_INDEX_UPDATE 0x7
+
+/* Messages on Control QP */
+#define ENABLE_VHBA_Q 0x1
+#define DISABLE_VHBA_Q 0x2
+#define TGT_RESET 0x3
+#define LINK_RESET 0x4
+#define ABORT_CMD 0x5
+#define LUN_RESET 0x6
+
+#define ENABLE_RSP 0x7
+#define DISC_INFO_UPDATE 0x8
+#define DISC_INFO_CONT_UPDATE 0x9
+#define PLINK_STATUS_UPDATE 0xA
+#define TGT_STATUS_UPDATE 0xB
+#define VHBA_HEART_BEAT 0x13 /* 0x0xC ~ 0x12 for FC HBA API */
+
+/* 24 bit FC port id ... */
+union port_id {
+ u32 b24:24;
+
+ struct {
+ u8 d_id[3];
+ u8 rsvd_1;
+ } r;
+
+ struct {
+ u8 al_pa;
+ u8 area;
+ u8 domain;
+ u8 rsvd_1;
+ } b;
+};
+
+/* Ring related structures ... */
+struct init_block {
+ u8 type;
+ u8 entry_size;
+ u16 ring_size;
+ u32 _reserved;
+
+ u32 read_index_rkey;
+ u32 base_addr_rkey;
+
+ u64 read_index_addr;
+ u64 base_addr;
+};
+
+struct enable_msg {
+ u8 type;
+ u8 rsvd;
+ u8 rsvd1;
+ u8 rsvd2;
+ u64 resource_id;
+};
+
+struct heart_beat_msg {
+ u8 type;
+ u8 rsvd;
+ u8 rsvd1;
+ u8 rsvd2;
+ u64 resource_id;
+};
+
+struct enable_rsp {
+ u8 type;
+ u8 rsvd;
+ u8 rsvd1;
+ u8 vp_index;
+ u32 rsvd2;
+ u64 resource_id;
+};
+
+struct vhba_link_status {
+ u8 type;
+ u8 _reserved1;
+ u16 phy_link_status;
+ u32 conn_down_timeout;
+};
+
+struct tgt_info {
+ u16 lun_count;
+ u16 loop_id;
+ u32 persistent_binding;
+ u32 port_id;
+ u8 media_type;
+ u8 _reserved[3];
+ u8 wwpn[WWN_SIZE];
+ u8 lun_map[MAX_FIBRE_LUNS >> 3];
+ u16 lun_ids[MAX_FIBRE_LUNS];
+ u8 wwnn[WWN_SIZE];
+};
+
+struct vhba_discovery_msg {
+ u8 type;
+ u8 _reserved1;
+ u16 queue_number;
+ u16 target_count;
+ u16 cont_count;
+ /* Tgts (at the most 1 struct tgt_info) */
+ struct tgt_info tgt_data[1];
+ u32 fcid;
+};
+
+struct vhba_discovery_cont_msg {
+ u8 type;
+ u8 seg_num;
+ u16 target_count;
+ /* Tgts (at the most 1 struct tgt_info) */
+ struct tgt_info tgt_data[1];
+};
+
+struct vhba_write_index_msg {
+ u8 type;
+ u8 _reserved1;
+ u16 write_index;
+ u32 _reserved;
+};
+
+struct vhba_tgt_status_msg {
+ u8 type;
+ u8 media_type;
+ u8 rscn_addr_format;
+ u8 flag;
+ u16 loop_id;
+ u16 _reserved3;
+ u8 wwpn[WWN_SIZE];
+ u32 port_id;
+ u32 persistent_binding;
+ u16 lun_count;
+ u16 _reserved4;
+ u8 lun_map[MAX_FIBRE_LUNS >> 3];
+ u16 lun_ids[MAX_FIBRE_LUNS];
+ u8 wwnn[WWN_SIZE];
+ u32 port_down_timeout;
+};
+
+struct vhba_abort_cmd {
+ u8 type;
+ u8 _reserved1;
+ u8 _reserved2;
+ u8 _reserved3;
+ u16 vhba_id;
+ u16 _reserved4;
+ u32 handle_to_abort;
+ u8 port_id[3];
+ u8 _reserved5;
+};
+
+struct vhba_lun_reset_msg {
+ u8 type;
+ u8 _reserved1;
+ u8 _reserved2;
+ u8 flag;
+ u16 vhba_id;
+ u16 lun;
+ u8 wwpn[WWN_SIZE];
+};
+
+struct vhba_tgt_reset_msg {
+ u8 type;
+ u8 _reserved1;
+ u8 _reserved2;
+ u8 flag;
+ u16 vhba_id;
+ u16 _reserved3;
+ u8 wwpn[WWN_SIZE];
+};
+
+struct vhba_link_reset_msg {
+ u8 type;
+ u8 _reserved1;
+ u16 vhba_id;
+};
+
+#define MAX_VHBA_MSG_SIZE sizeof(struct init_block)
+#define MAX_VHBA_NAME_SIZE 16
+#define MAX_CHASSIS_NAME_SIZE 32
+#define MAX_SESSION_NAME_SIZE 32 /* Server Profile Name Size */
+
+#define BIT_0 0x1
+#define BIT_1 0x2
+#define BIT_2 0x4
+#define BIT_3 0x8
+#define BIT_4 0x10
+#define BIT_5 0x20
+#define BIT_6 0x40
+#define BIT_7 0x80
+#define BIT_8 0x100
+#define BIT_9 0x200
+#define BIT_10 0x400
+#define BIT_11 0x800
+#define BIT_12 0x1000
+#define BIT_13 0x2000
+#define BIT_14 0x4000
+#define BIT_15 0x8000
+
+#define LSB(x) ((u8)(x))
+#define MSB(x) ((u8)((u16)(x) >> 8))
+
+#define LSW(x) ((u16)(x))
+#define MSW(x) ((u16)((u32)(x) >> 16))
+
+#define LSD(x) ((u32)((u64)(x)))
+#define MSD(x) ((u32)((((u64)(x)) >> 16) >> 16))
+
+#define CMD_SP(cmnd) ((cmnd)->SCp.ptr)
+
+#define TMF_WRITE_DATA BIT_0
+
+#define TMF_READ_DATA BIT_1
+
+#define CMD_SP(cmnd) ((cmnd)->SCp.ptr)
+#define CMD_COMPL_STATUS(cmnd) ((cmnd)->SCp.this_residual)
+#define CMD_RESID_LEN(cmnd) ((cmnd)->SCp.buffers_residual)
+#define CMD_SCSI_STATUS(cmnd) ((cmnd)->SCp.Status)
+#define CMD_ACTUAL_SNSLEN(cmnd) ((cmnd)->SCp.Message)
+#define CMD_ENTRY_STATUS(cmnd) ((cmnd)->SCp.have_data_in)
+
+#define DEC_REF_CNT(x) do { \
+ if (atomic_dec_and_test(&x->ref_cnt)) { \
+ wake_up(&x->delete_wq); \
+ } \
+ } while (0)
+
+static inline u8 *host_to_fcp_swap(u8 *, u32);
+
+/**
+ * host_to_fcp_swap() -
+ * @fcp:
+ * @bsize:
+ *
+ * Returns
+ */
+static inline u8 *host_to_fcp_swap(u8 *fcp, u32 bsize)
+{
+ u32 *ifcp = (u32 *) fcp;
+ u32 *ofcp = (u32 *) fcp;
+ u32 iter = bsize >> 2;
+
+ for (; iter; iter--)
+ *ofcp++ = swab32(*ifcp++);
+
+ return fcp;
+}
+
+#define VHBA_IO_STATE_ACTIVE 0
+#define VHBA_IO_STATE_ABORTING 1
+#define VHBA_IO_STATE_ABORTED 2
+#define VHBA_IO_STATE_ABORT_FAILED 3
+#define VHBA_IO_STATE_ABORT_NEEDED 4
+#define VHBA_IO_STATE_TIMEDOUT 5
+#define VHBA_IO_STATE_RESET 6
+
+#define SRB_STATE_NO_DEFER_LIST 0
+#define SRB_STATE_DEFER_LIST 1
+
+struct srb {
+ struct list_head list;
+
+ struct scsi_xg_vhba_host *ha; /* HA the SP is queued on */
+ struct scsi_cmnd *cmd; /* Linux SCSI command pkt */
+ struct timer_list timer; /* Command timer */
+ u16 flags;
+
+ /* Request state */
+ u16 state;
+
+ /* Target/LUN queue pointers. */
+ struct os_tgt *tgt_queue;
+ struct os_lun *lun_queue;
+
+ /* Single transfer DMA context */
+ dma_addr_t dma_handle;
+
+ u32 request_sense_length;
+ u8 *request_sense_ptr;
+ u32 queue_num;
+
+ /* Suspend delay */
+ int delay;
+
+ u32 tot_dsds;
+
+ void *pool_fmr[6];
+
+ /* Raw completion info for use by failover ? */
+ u8 fo_retry_cnt; /* Retry count this request */
+ u8 err_id; /* error id */
+#define SRB_ERR_PORT 1 /* Request failed -- "port down" */
+#define SRB_ERR_LOOP 2 /* Request failed -- "loop down" */
+#define SRB_ERR_DEVICE 3 /* Request failed -- "device error" */
+#define SRB_ERR_OTHER 4
+
+ int iocb_handle;
+ void *unaligned_sg;
+ int use_copy;
+ void *bounce_buffer;
+ int bounce_buf_len;
+ int use_sg_orig;
+ struct scatterlist *lcl_sg;
+ int lcl_sg_cnt;
+ int abort_cnt;
+
+ u16 error_flag; /* if page_list allocation fails */
+};
+
+#define MAX_SRB_SIZE sizeof(struct srb)
+
+/*
+* SRB flag definitions
+*/
+#define SRB_TIMEOUT BIT_0 /* Command timed out */
+#define SRB_DMA_VALID BIT_1 /* Command sent to ISP */
+#define SRB_WATCHDOG BIT_2 /* Command on watchdog list */
+#define SRB_ABORT_PENDING BIT_3 /* Command abort sent to device */
+
+#define SRB_ABORTED BIT_4 /* Command aborted command already */
+#define SRB_RETRY BIT_5 /* Command needs retrying */
+#define SRB_GOT_SENSE BIT_6 /* Command has sense data */
+#define SRB_FAILOVER BIT_7 /* Command in failover state */
+
+#define SRB_BUSY BIT_8 /* Command is in busy retry state */
+#define SRB_FO_CANCEL BIT_9 /* Command don't need to do failover */
+#define SRB_IOCTL BIT_10 /* IOCTL command. */
+#define SRB_TAPE BIT_11 /* FCP2 (Tape) command. */
+
+/*
+* SRB state definitions
+*/
+#define SRB_FREE_STATE 0 /* returned back */
+#define SRB_PENDING_STATE 1 /* queued in LUN Q */
+#define SRB_ACTIVE_STATE 2 /* in Active Array */
+#define SRB_DONE_STATE 3 /* queued in Done Queue */
+#define SRB_RETRY_STATE 4 /* in Retry Queue */
+#define SRB_SUSPENDED_STATE 5 /* in suspended state */
+#define SRB_NO_QUEUE_STATE 6 /* is in between states */
+#define SRB_ACTIVE_TIMEOUT_STATE 7 /* in Active Array but timed out */
+#define SRB_FAILOVER_STATE 8 /* in Failover Queue */
+#define SRB_SCSI_RETRY_STATE 9 /* in Scsi Retry Queue */
+
+struct vhba_ib_stats {
+ u64 cqp_dn_cnt;
+ u64 cqp_up_cnt;
+ u64 cqp_send_err_cnt;
+ u64 cqp_recv_err_cnt;
+ u64 cqp_remote_disconn_cnt;
+ u64 dqp_dn_cnt;
+ u64 dqp_up_cnt;
+ u64 dqp_send_err_cnt;
+ u64 dqp_recv_err_cnt;
+ u64 dqp_remote_disconn_cnt;
+ u64 curr_outstanding_reqs;
+ u64 total_req_q_fulls;
+ u64 total_outstding_q_wraps;
+} __packed;
+
+struct vhba_xsmp_stats {
+ u64 install_msg_cnt;
+ u64 delete_msg_cnt;
+ u64 update_msg_cnt;
+ u64 cfg_stats_msg_cnt;
+ u64 clr_stats_msg_cnt;
+ u64 sync_begin_msg_cnt;
+ u64 sync_end_msg_cnt;
+ u64 oper_req_msg_cnt;
+ u64 unknown_msg_cnt;
+ u64 xt_state_dn_cnt;
+ u64 tca_lid_changed_cnt;
+ u64 abort_all_cnt;
+ u64 boot_msg_cnt;
+ u64 last_unknown_msg;
+ u64 last_msg;
+} __packed;
+
+struct vhba_io_stats {
+ u64 total_read_reqs;
+ u64 total_write_reqs;
+ u64 total_task_mgmt_reqs;
+ u64 total_read_mbytes;
+ u64 total_write_mbytes;
+ u64 total_io_rsp;
+ u64 total_copy_ios;
+ u64 total_copy_page_allocs;
+ u64 total_copy_page_frees;
+ atomic_t vh_q_full_cnt[VHBA_MAX_VH_Q_COUNT];
+ atomic_t num_vh_q_reqs[VHBA_MAX_VH_Q_COUNT];
+ u64 qcmd_busy_ret_cnt;
+} __packed;
+
+struct vhba_fmr_stats {
+ u64 map_cnt;
+ u64 unmap_cnt;
+ u64 map_fail_cnt;
+ u64 unaligned_io_cnt;
+ u64 unaligned_ptr_cnt;
+ u64 total_fmr_ios;
+} __packed;
+
+struct vhba_fc_stats {
+ u64 link_dn_cnt;
+ u64 link_dead_cnt;
+ u64 link_up_cnt;
+ u64 rscn_up_cnt;
+ u64 rscn_dn_cnt;
+ u64 rscn_dead_cnt;
+ u64 rscn_multiple_up_cnt;
+ u64 rscn_multiple_dn_cnt;
+ u64 last_up_tgt;
+ u64 last_dn_tgt;
+ u64 last_dead_tgt;
+ u64 disc_info_cnt;
+ u64 enable_resp_cnt;
+ u64 enable_msg_cnt;
+} __packed;
+
+struct vhba_scsi_stats {
+ u64 invalid_tgt_cnt;
+ u64 invalid_lun_cnt;
+ u64 abort_success_cnt;
+ u64 abort_fail_cnt;
+ u64 dev_reset_success_cnt;
+ u64 dev_reset_fail_cnt;
+ u64 bus_reset_success_cnt;
+ u64 bus_reset_fail_cnt;
+ u64 host_reset_success_cnt;
+ u64 host_reset_fail_cnt;
+} __packed;
+
+struct vhba_ha_stats {
+ struct vhba_ib_stats ib_stats;
+ struct vhba_io_stats io_stats;
+ struct vhba_fmr_stats fmr_stats;
+ struct vhba_fc_stats fc_stats;
+ struct vhba_scsi_stats scsi_stats;
+} __packed;
+
+#define VHBA_NAME_LEN 16
+#define VHBA_LVM_NAME_LEN 128
+#define VHBA_MAX_BOOT_DEV 6
+#define VHBA_MAX_MOUNT_DEV 6
+#define VHBA_MOUNT_OPT_LEN 32
+
+struct host_san_mount_lvm {
+ u8 logical_vol_group[VHBA_LVM_NAME_LEN];
+ u8 logical_vol[VHBA_LVM_NAME_LEN];
+};
+
+struct host_san_vhba_list_sts {
+ u8 vh_name[VHBA_NAME_LEN];
+ u64 wwn;
+ u16 lun;
+ u8 tgt_num; /* target number to expose */
+};
+
+union xg_tgt_wwpn {
+ u8 wwpn_t[WWN_SIZE];
+ u64 wwpn_val;
+};
+
+struct scsi_xg_vhba_host {
+ struct list_head list;
+ u8 host_str[16];
+ atomic_t vhba_flags;
+ struct vhba_ha_stats stats;
+ struct virtual_hba *vhba;
+ int vhba_num;
+
+ struct proc_dir_entry *vhba_proc;
+ struct proc_dir_entry *vhba_proc_target;
+
+ u8 *vhba_name[MAX_VHBA_NAME_SIZE];
+
+ u64 tca_guid;
+ u16 tca_lid;
+
+ /* SCSI Info */
+ struct Scsi_Host *host;
+ unsigned long host_no;
+ unsigned long instance;
+ u16 max_tgt_id;
+ u16 max_luns;
+ u16 max_targets;
+ u32 target_count;
+ struct srb *status_srb;
+ u32 lun_count;
+ struct list_head disc_ports;
+ /* OS target queue pointers */
+ struct os_tgt *otgt[MAX_FIBRE_TARGETS];
+
+ struct {
+ u32 init_done:1;
+ u32 online:1;
+ u32 reset_active:1;
+ u32 process_response_queue:1;
+ u32 enable_target_reset:1;
+ } flags;
+
+ /* Boot info */
+ u16 boot_count;
+ struct host_san_vhba_list_sts sanboot[VHBA_MAX_BOOT_DEV];
+
+ /* Mount info */
+ u16 mount_count;
+ struct host_san_vhba_list_sts sanmount[VHBA_MAX_MOUNT_DEV];
+ u16 mount_type; /* 1 = logical vol
+ 2 = direct mount
+ 0 = vhba */
+
+ /* name of direct mount device: ex: /dev/sdb */
+ u8 direct_mount_dev[VHBA_LVM_NAME_LEN];
+
+ /* logical volume group and logical volume */
+ struct host_san_mount_lvm host_lvm_info;
+
+ /* mount options */
+ u8 mount_options[VHBA_MOUNT_OPT_LEN];
+
+ u8 discs_ready_flag;
+
+ /* IB Info */
+ u64 resource_id;
+ struct ib_link_info *link;
+ u32 control_qp_handle;
+ u32 control_qpn;
+ u32 data_qp_handle;
+ u32 data_qpn;
+ struct xt_cm_private_data ctrl_pvt;
+ struct xt_cm_private_data data_pvt;
+ atomic_t qp_status;
+ struct init_block init_blk;
+ struct vhba_write_index_msg *send_write_index_msg;
+
+ u32 max_cont_segs;
+
+ u8 sync_flag;
+
+ /* QL Info */
+ u32 vp_index;
+ u16 revision;
+ u8 ports;
+
+ /* FMR */
+ void *fmr_pool;
+ void *request_ring_fmr;
+ void *rindex_fmr;
+ void *scratch;
+
+ atomic_t link_state;
+ u32 device_flags;
+
+#define SRB_MIN_REQ 128
+
+ atomic_t dqp_send_buf_consumed;
+
+ /* Req ring lock, rings, and indexes */
+ dma_addr_t request_dma; /* Physical address */
+ struct cmd_type_7 *request_ring; /* Base virtual address */
+ struct cmd_type_7 *request_ring_ptr; /* Current address */
+ u16 req_ring_rindex_dummy; /* Current index */
+ s16 req_ring_windex; /* Current index */
+ u16 req_q_cnt; /* Number of available entries */
+ u16 request_q_length;
+ dma_addr_t req_ring_rindex_dma;
+ u32 *req_ring_rindex;
+
+ /* Outstanding commands */
+ struct srb *outstanding_cmds[MAX_OUTSTANDING_COMMANDS];
+ u32 current_outstanding_cmd;
+ void *send_buf_ptr[REQUEST_ENTRY_CNT_24XX];
+ struct ib_wc recv_buf_ptr[64];
+
+ /* Defer list */
+ struct list_head defer_list;
+ atomic_t defer_cnt;
+ atomic_t periodic_def_cnt;
+ atomic_t ib_link_down_cnt;
+ atomic_t ib_status;
+
+ /* Lock order: First hold host_lock before holding list_lock */
+ spinlock_t list_lock ____cacheline_aligned;
+ spinlock_t io_lock ____cacheline_aligned;
+};
+
+struct xsvhba_conn {
+ u8 type;
+ int state;
+ struct xscore_conn_ctx ctx;
+};
+
+struct xsvhba_work {
+ struct work_struct work;
+ xsmp_cookie_t xsmp_hndl;
+ struct virual_hba *vhba;
+ u8 *msg;
+ u32 idr;
+ int len;
+ int status;
+};
+
+struct virtual_hba {
+ struct scsi_xg_vhba_host *ha;
+ struct vhba_xsmp_msg *cfg;
+ struct list_head list;
+ wait_queue_head_t timer_wq;
+ wait_queue_head_t delete_wq;
+
+ struct xsvhba_conn ctrl_conn;
+ struct xsvhba_conn data_conn;
+ struct xsmp_session_info xsmp_info;
+ xsmp_cookie_t xsmp_hndl;
+
+ atomic_t ref_cnt;
+ atomic_t vhba_state;
+ atomic_t reconnect_flag;
+ u32 idr;
+ int sync_needed;
+ int reconn_try_cnt;
+ int reconn_attempt;
+ int qp_count;
+ u64 cs_timeout_count;
+ atomic_t abort_count;
+ int qp_poll_count;
+ int heartbeat_count;
+ u64 resource_id;
+ int scanned_once;
+ int scan_reqd;
+ int xg_init_done;
+ struct proc_dir_entry *admin_down_proc;
+ struct work_struct work;
+};
+
+#define WWN_SIZE 8
+
+#define TQF_ONLINE 0 /* Device online to OS */
+#define TQF_SUSPENDED 1
+#define TQF_RETRY_CMDS 2
+
+#define VHBA_ALLOC_FMR 0x40
+#define VHBA_NO_TARGET_STATE 0x200
+#define VHBA_ADMIN_DOWN_STATE 0x400
+
+#define VHBA_DATA_QP 0x1
+#define VHBA_CONTROL_QP 0x2
+#define VHBA_BOTH_QP 0x3
+
+#define VHBA_READY 0
+#define VHBA_DRAINING 1
+#define VHBA_ABORTING 2
+#define VHBA_DELETING 3
+#define VHBA_DELETED 4
+
+#define FCS_UNCONFIGURED 1
+#define FCS_DEVICE_DEAD 2
+#define FCS_DEVICE_LOST 3
+#define FCS_ONLINE 4
+#define FCS_NOT_SUPPORTED 5
+
+struct os_lun {
+ struct fc_lun *fclun; /* FC LUN context pointer */
+ u32 lun_id;
+
+ unsigned long q_flag;
+
+ u_long q_timeout; /* total command timeouts */
+ atomic_t q_timer; /* suspend timer */
+ u32 q_count; /* current count */
+ u32 q_max; /* maxmum count lun can be suspended */
+ u8 q_state; /* lun State */
+
+ u_long io_cnt; /* total xfer count since boot */
+ u_long out_cnt; /* total outstanding IO count */
+ u_long w_cnt; /* total writes */
+ u_long r_cnt; /* total reads */
+ u_long avg_time; /* */
+};
+
+struct os_tgt {
+ /* LUN context pointer */
+ struct os_lun *olun[MAX_FIBRE_LUNS_MORE];
+ struct fc_port *fcport;
+ unsigned long flags;
+ struct scsi_xg_vhba_host *ha;
+
+ /* Persistent binding information */
+ union port_id d_id;
+ u8 node_name[WWN_SIZE];
+ u8 port_name[WWN_SIZE];
+ u8 init_done;
+ atomic_t ncmds;
+ u16 state;
+};
+
+#define FCF_TAPE_PRESENT BIT_0
+struct fc_port {
+ struct list_head list;
+ struct list_head fcluns;
+
+ u8 node_name[WWN_SIZE];
+ u8 port_name[WWN_SIZE];
+ union port_id d_id;
+ u16 loop_id;
+
+ u8 port_type;
+
+ atomic_t state;
+ u32 flags;
+
+ struct os_tgt *tgt_queue;
+ u16 os_target_id;
+
+ u8 device_type;
+ u8 unused;
+
+ u8 bound;
+ u16 lun_count;
+
+ u8 lun_map[MAX_FIBRE_LUNS >> 3];
+ u16 lun_ids[MAX_FIBRE_LUNS];
+ u32 persistent_binding;
+
+ struct fc_rport *rport;
+ u32 supported_classes;
+
+};
+
+struct fc_lun {
+ struct list_head list;
+
+ u16 lun;
+ atomic_t state;
+ u8 device_type;
+
+ u8 max_path_retries;
+ u32 flags;
+};
+
+#define TGT_Q(ha, t) (ha->otgt[t])
+#define LUN_Q(ha, t, l) (TGT_Q(ha, t)->olun[l])
+#define GET_LU_Q(ha, t, l) \
+ ((TGT_Q(ha, t) != NULL) ? TGT_Q(ha, t)->olun[l] : NULL)
+
+extern struct virtual_hba vhba_g;
+extern struct idr vhba_idr_table;
+extern rwlock_t vhba_global_lock;
+extern u32 vhba_current_idr;
+extern atomic_t vhba_count;
+extern struct workqueue_struct *vhba_workqueuep;
+
+#define MAX_LUNS 0xffff
+
+#endif /* __VHBA_OSDEF_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>
+#include <linux/delay.h>
+
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+#include "vhba_ib.h"
+#include "vhba_defs.h"
+
+int limit = PAGE_SIZE;
+
+int force_sp_copy;
+
+struct proc_dir_entry *proc_root_vhba = 0;
+struct proc_dir_entry *proc_root_vhba_dev = 0;
+struct proc_dir_entry *proc_root_vhba_targ = 0;
+
+/*
+int vhba_print_io_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_ib_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_xsmp_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_fmr_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_fc_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_scsi_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+*/
+
+static int vhba_proc_read_debug(struct seq_file *m, void *data);
+static ssize_t vhba_proc_write_debug(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp);
+static int vhba_proc_open_debug(struct inode *inode, struct file *file);
+static int vhba_proc_read_force_copy(struct seq_file *m, void *data);
+static ssize_t vhba_proc_write_force_copy(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp);
+static int vhba_proc_open_force_copy(struct inode *inode, struct file *file);
+static int vhba_proc_read_device(struct seq_file *m, void *data);
+static ssize_t vhba_proc_write_device(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp);
+static int vhba_proc_open_device(struct inode *inode, struct file *file);
+static int vhba_proc_read_target(struct seq_file *m, void *data);
+static ssize_t vhba_proc_write_target(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp);
+static int vhba_proc_open_target(struct inode *inode, struct file *file);
+static int vhba_proc_read_san_info(struct seq_file *m, void *data);
+static ssize_t vhba_proc_write_san_info(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp);
+static int vhba_proc_open_san_info(struct inode *inode, struct file *file);
+
+static const struct file_operations vhba_debug_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = vhba_proc_open_debug,
+ .read = seq_read,
+ .write = vhba_proc_write_debug,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations vhba_force_copy_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = vhba_proc_open_force_copy,
+ .read = seq_read,
+ .write = vhba_proc_write_force_copy,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations vhba_device_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = vhba_proc_open_device,
+ .read = seq_read,
+ .write = vhba_proc_write_device,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations vhba_target_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = vhba_proc_open_target,
+ .read = seq_read,
+ .write = vhba_proc_write_target,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations vhba_san_info_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = vhba_proc_open_san_info,
+ .read = seq_read,
+ .write = vhba_proc_write_san_info,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int vhba_proc_read_device(struct seq_file *m, void *data)
+{
+ char qp_sts_str[][64] = { "VHBA_QP_NOTCONNECTED",
+ "VHBA_QP_TRYCONNECTING",
+ "VHBA_QP_RECONNECTING",
+ "VHBA_QP_PARTIAL_CONNECT",
+ "VHBA_QP_CONNECTED",
+ "ERROR"
+ };
+
+ char vhba_sts_str[][64] = {
+ "VHBA_STATE_NOT_ACTIVE",
+ "VHBA_STATE_ACTIVE",
+ "VHBA_STATE_SCAN",
+ "VHBA_STATE_BUSY",
+ "ERROR"
+ };
+
+ struct virtual_hba *vhba;
+ int *pint;
+ struct scsi_xg_vhba_host *ha;
+ struct vhba_xsmp_msg *cfg;
+ int link_state;
+ u64 wwn;
+
+ vhba = (struct virtual_hba *)
+ vhba_get_context_by_idr((u32) (unsigned long)m->private);
+ if (vhba == NULL) {
+ dprintk(TRC_PROC, NULL, "Error - Null vhba context!\n");
+ return 0;
+ }
+ ha = vhba->ha;
+ if (atomic_read(&ha->vhba_flags) != VHBA_READY) {
+ dprintk(TRC_PROC, vhba, "VHBA not in ready state to\n"
+ " display valid information!\n");
+ goto out;
+ }
+ cfg = vhba->cfg;
+
+ seq_puts(m, "VHBA Information\n");
+ seq_puts(m, "----------------\n");
+ seq_printf(m, "Symbolic Name\t\t\t: %s\n", (char *)(cfg->vh_name));
+ seq_printf(m, "Chassis Name\t\t\t: %s\n", vhba->xsmp_info.chassis_name);
+ seq_printf(m, "Chassis Version\t\t\t: %x\n", vhba->xsmp_info.version);
+ seq_printf(m, "Server-Profile Name\t\t: %s\n",
+ vhba->xsmp_info.session_name);
+
+ if (vhba->cfg->vhba_flag & 0x1) {
+ seq_puts(m, "Bootable\t\t\t: Yes\n");
+ } else {
+ seq_puts(m, "Bootable\t\t\t: No\n");
+ }
+ seq_printf(m,
+ "VHBA state\t\t\t: %s\n",
+ vhba_sts_str[atomic_read(&vhba->vhba_state)]);
+ seq_puts(m, "Link State\t\t\t: ");
+ link_state = atomic_read(&ha->link_state);
+ switch (link_state) {
+ case 0:
+ seq_puts(m, "LINK_DOWN\n");
+ break;
+ case 1:
+ seq_puts(m, "LINK_UP\n");
+ break;
+ case 2:
+ seq_puts(m, "LINK_DEAD\n");
+ break;
+ default:
+ seq_puts(m, "UNKNOWN\n");
+ }
+ seq_puts(m, "IB Status\t\t\t: ");
+ switch (atomic_read(&ha->ib_status)) {
+ case 0:
+ seq_puts(m, "IB_UP\n");
+ break;
+ case 1:
+ seq_puts(m, "IB_DOWN\n");
+ break;
+ case 2:
+ seq_puts(m, "IB_DEAD\n");
+ break;
+ default:
+ seq_puts(m, "UNKNOWN\n");
+ }
+ seq_printf(m, "Reconnect Attempts\t\t: %d\n",
+ (int)vhba->reconn_attempt);
+ seq_printf(m, "Cumulative QP Count\t\t: %d\n", (int)vhba->qp_count);
+ seq_printf(m, "Lun masking\t\t\t: %s\n",
+ vhba->cfg->lunmask_enable ? "Enabled" : "Disabled");
+ seq_printf(m, "Host Number\t\t\t: %u\n", (unsigned)ha->host_no);
+ seq_printf(m, "Target count\t\t\t: %llu\n", (u64) ha->target_count);
+ wwn = (u64) (vhba->cfg)->wwn;
+ seq_puts(m, "Port WWN\t\t\t:\n");
+ seq_printf(m,
+ "%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n",
+ (u8) (wwn & 0xff), (u8) ((wwn >> 8) & 0xff),
+ (u8) ((wwn >> 16) & 0xff), (u8) ((wwn >> 24) & 0xff),
+ (u8) ((wwn >> 32) & 0xff), (u8) ((wwn >> 40) & 0xff),
+ (u8) ((wwn >> 48) & 0xff), (u8) ((wwn >> 56) & 0xff));
+
+ seq_printf(m, "Scan Required\t\t\t: %d\n", vhba->scan_reqd);
+ seq_printf(m, "SCSI Max Retry count\t\t: %d\n", vhba_max_scsi_retry);
+ seq_puts(m, "\n");
+
+ vhba_print_xsmp_stats(m, ha);
+
+ seq_puts(m, "\n");
+ seq_puts(m, "VHBA Infiniband Information\n");
+ seq_puts(m, "---------------------------\n");
+ seq_printf(m, "Remote IB LID\t\t\t: 0x%x\n", be16_to_cpu(cfg->tca_lid));
+ pint = (int *)&cfg->tca_guid;
+ seq_printf(m, "Remote IB GUID\t\t\t: 0x%x%x\n",
+ be32_to_cpu(*pint), be32_to_cpu(*(pint + 1)));
+ seq_printf(m, "Resource ID\t\t\t: 0x%Lx\n", cfg->resource_id);
+ seq_printf(m, "CQP handle/qpn\t\t\t: 0x%x/%u\n",
+ ha->control_qp_handle, ha->control_qpn);
+ seq_printf(m, "DQP handle/qpn\t\t\t: 0x%x/%u\n",
+ ha->data_qp_handle, ha->data_qpn);
+ seq_printf(m, "QP status\t\t\t: %s\n",
+ qp_sts_str[atomic_read(&ha->qp_status)]);
+ seq_printf(m, "Driver ref count\t\t: %d\n",
+ atomic_read(&vhba->ref_cnt));
+ seq_puts(m, "\n");
+
+ vhba_print_ib_stats(m, ha);
+ seq_puts(m, "\n");
+
+ vhba_print_io_stats(m, ha);
+ seq_puts(m, "\n");
+
+ /*XXX this all needs to go into different stats proc files
+ * The vmkernel helpers don't do multipage returns, so
+ * each /proc entry can only be less than 4K, 3K? */
+ vhba_print_fmr_stats(m, ha);
+ seq_puts(m, "\n");
+
+ vhba_print_fc_stats(m, ha);
+ seq_puts(m, "\n");
+
+ vhba_print_scsi_stats(m, ha);
+ seq_puts(m, "\n");
+out:
+ DEC_REF_CNT(vhba);
+ return 0;
+}
+
+static ssize_t vhba_proc_write_device(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp)
+{
+ struct virtual_hba *vhba;
+ int newval = 0;
+ void *data = PDE_DATA(file_inode(file));
+ int ret;
+
+ vhba = (struct virtual_hba *)
+ vhba_get_context_by_idr((u32) (unsigned long)data);
+ if (vhba == NULL) {
+ dprintk(TRC_PROC, NULL, "Error - Null vhba context!\n");
+ return count;
+ }
+ ret = kstrtoint(buffer, 0, &newval);
+ if (ret < 0)
+ return ret;
+ vhba->scan_reqd = 0;
+ DEC_REF_CNT(vhba);
+ return count;
+}
+
+static int vhba_proc_open_device(struct inode *inode, struct file *file)
+{
+ return single_open(file, vhba_proc_read_device, PDE_DATA(inode));
+}
+
+static int vhba_proc_read_target(struct seq_file *m, void *data)
+{
+ struct virtual_hba *vhba;
+ int tgt, k;
+ struct scsi_xg_vhba_host *ha;
+ struct os_tgt *tq;
+
+ vhba = (struct virtual_hba *)
+ vhba_get_context_by_idr((u32) (unsigned long)m->private);
+
+ if (vhba == NULL) {
+ dprintk(TRC_PROC, NULL, "Error - Null vhba context!\n");
+ goto out;
+ }
+
+ ha = vhba->ha;
+ if (atomic_read(&ha->vhba_flags) != VHBA_READY) {
+ dprintk(TRC_PROC, NULL,
+ "VHBA not in ready state to display valid information!\n");
+ goto out;
+ }
+
+ seq_puts(m, "VHBA Target Information\n");
+ seq_puts(m, "-----------------------\n\n");
+ seq_printf(m, "Host no\t\t\t\t: %u\n", (unsigned)ha->host_no);
+
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ tq = TGT_Q(ha, tgt);
+ if (!tq || (atomic_read(&tq->fcport->state) != FCS_ONLINE))
+ continue;
+ seq_puts(m, "Target WWPN\t\t\t: ");
+ for (k = 0; k < WWN_SIZE; k++) {
+ seq_printf(m, "%02x ", tq->port_name[k]);
+ }
+ seq_printf(m, "\nFC Port id\t\t\t: 0x%x\n", tq->d_id.b24);
+ seq_printf(m, "Bound\t\t\t\t: %d\n", tq->fcport->bound);
+ seq_printf(m, "ncmds\t\t\t\t: %d\n", atomic_read(&tq->ncmds));
+ seq_printf(m, "Lun Count\t\t\t: %d\n", tq->fcport->lun_count);
+ seq_printf(m, "N-Port Handle\t\t\t: 0x%x\n",
+ tq->fcport->loop_id);
+ seq_printf(m, "Map Order\t\t\t: %d\n",
+ tq->fcport->os_target_id);
+
+ seq_puts(m, "Lun id(s)\t\t\t:");
+ for (k = 0; k < tq->fcport->lun_count; k++) {
+ if (k != 0) {
+ seq_puts(m, ",");
+ }
+ seq_printf(m, " %d", tq->fcport->lun_ids[k]);
+ }
+ seq_puts(m, "\n\n");
+ seq_puts(m, "-------------------------\n\n");
+ }
+out:
+ DEC_REF_CNT(vhba);
+ return 0;
+}
+static ssize_t vhba_proc_write_target(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp)
+{
+ /* Simply return from the function */
+ return 0;
+}
+
+static int vhba_proc_open_target(struct inode *inode, struct file *file)
+{
+ return single_open(file, vhba_proc_read_target, PDE_DATA(inode));
+}
+
+static int vhba_proc_read_san_info(struct seq_file *m, void *data)
+{
+ int j;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ read_lock(&vhba_global_lock);
+ list_for_each_entry(vhba, &vhba_g.list, list) {
+ ha = vhba->ha;
+ dprintk(TRC_PROC, NULL,
+ "Mount count = %d\tBoot count = %d\n",
+ ha->mount_count, ha->boot_count);
+ for (j = 0; j < ha->boot_count; j++)
+ seq_printf(m, "sanboot%d=%s:%d:%d\n",
+ j, ha->sanboot[j].vh_name,
+ ha->sanboot[j].tgt_num, ha->sanboot[j].lun);
+ seq_puts(m, "\n\n");
+ for (j = 0; j < ha->mount_count; j++)
+ seq_printf(m, "sanmount%d=%s:%d:%d\n",
+ j, ha->sanmount[j].vh_name,
+ ha->sanmount[j].tgt_num,
+ ha->sanmount[j].lun);
+ if (ha->mount_type == 1)
+ seq_printf(m, "sanmount%d=lvm:%s:%s\n",
+ j, ha->host_lvm_info.logical_vol_group,
+ ha->host_lvm_info.logical_vol);
+ else if (ha->mount_type == 2)
+ seq_printf(m, "sanmount=%s\n", ha->direct_mount_dev);
+ if (ha->mount_options != NULL)
+ seq_printf(m, "mount-opts:%s:%s\n",
+ (char *)vhba->cfg->vh_name,
+ ha->mount_options);
+ }
+ read_unlock(&vhba_global_lock);
+
+ return 0;
+}
+
+static ssize_t vhba_proc_write_san_info(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+ /* Simply return from the function */
+ return 0;
+}
+
+static int vhba_proc_open_san_info(struct inode *inode, struct file *file)
+{
+ return single_open(file, vhba_proc_read_san_info, PDE_DATA(inode));
+}
+
+static int vhba_proc_read_debug(struct seq_file *m, void *data)
+{
+ seq_printf(m, "Total wait time(secs): %ld\n", (vhba_wait_time / HZ));
+ seq_printf(m, "Debug bitmask: 0x%x\n", vhba_debug);
+ return 0;
+}
+
+static ssize_t vhba_proc_write_debug(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp)
+{
+ int ret;
+ char *buf = (char *) __get_free_page(GFP_USER);
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(buf, buffer, count - 1)) {
+ goto out;
+ }
+ buf[count] = '\0';
+
+ ret = kstrtoint(buf, 0, &vhba_debug);
+ if (ret != 0) {
+ return -EINVAL;
+ }
+ return count;
+
+out:
+ free_page((unsigned long)buf);
+ return -EINVAL;
+}
+
+static int vhba_proc_open_debug(struct inode *inode, struct file *file)
+{
+ return single_open(file, vhba_proc_read_debug, PDE_DATA(inode));
+}
+
+static int vhba_proc_read_force_copy(struct seq_file *m, void *data)
+{
+ seq_printf(m, "%d\n", force_sp_copy);
+ return 0;
+}
+
+static ssize_t vhba_proc_write_force_copy(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+ int newval;
+ int ret;
+ char *buf = (char *) __get_free_page(GFP_USER);
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(buf, buffer, count - 1)) {
+ goto out;
+ }
+ buf[count] = '\0';
+
+ ret = kstrtoint(buf, 0, &newval);
+ if (ret != 0) {
+ return -EINVAL;
+ }
+ if (newval >= 0 && newval < 2) { /* Sanity checks */
+ force_sp_copy = newval;
+ return count;
+ } else
+ return -EINVAL;
+
+out:
+ free_page((unsigned long)buf);
+ return -EINVAL;
+}
+
+static int vhba_proc_open_force_copy(struct inode *inode, struct file *file)
+{
+ return single_open(file, vhba_proc_read_force_copy, PDE_DATA(inode));
+}
+
+int vhba_add_proc_entry(struct virtual_hba *vhba)
+{
+ struct proc_dir_entry *file;
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ char name[35];
+
+ sprintf(name, "%s.%Lx", (char *)vhba->cfg->vh_name, vhba->resource_id);
+ file = proc_create_data((char *)name, S_IFREG, proc_root_vhba_dev,
+ &vhba_device_proc_fops,
+ (void *)(unsigned long)vhba->idr);
+ if (!file) {
+ eprintk(vhba, "Unable to create/proc entry\n");
+ return -1;
+ }
+ ha->vhba_proc = file;
+
+ return 0;
+}
+
+void vhba_remove_proc_entry(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ char name[35];
+
+ if (ha->vhba_proc) {
+ sprintf(name, "%s.%Lx", (char *)vhba->cfg->vh_name,
+ vhba->resource_id);
+ remove_proc_entry((char *)name, proc_root_vhba_dev);
+ ha->vhba_proc = 0;
+ }
+}
+
+int vhba_add_target_proc_entry(struct virtual_hba *vhba)
+{
+ struct proc_dir_entry *file;
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ int ret = 0;
+ char name[35];
+
+ sprintf(name, "%s.%Lx", (char *)vhba->cfg->vh_name, vhba->resource_id);
+ file = proc_create_data((char *)name, S_IFREG, proc_root_vhba_targ,
+ &vhba_target_proc_fops,
+ (void *)(unsigned long)vhba->idr);
+ if (!file) {
+ eprintk(vhba, "Unable to create/proc entry\n");
+ ret = -1;
+ goto add_target_proc_end;
+ }
+
+ ha->vhba_proc_target = file;
+
+add_target_proc_end:
+ return ret;
+}
+
+int vhba_remove_target_proc_entry(struct virtual_hba *vhba)
+{
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+ char name[35];
+
+ if (ha->vhba_proc_target) {
+ sprintf(name, "%s.%Lx", (char *)vhba->cfg->vh_name,
+ vhba->resource_id);
+ remove_proc_entry((char *)name, proc_root_vhba_targ);
+ ha->vhba_proc_target = 0;
+ }
+ return 0;
+}
+
+int vhba_create_procfs_root_entries(void)
+{
+ struct proc_dir_entry *debug_file = 0;
+ struct proc_dir_entry *force_copy_file = 0;
+ struct proc_dir_entry *san_info = 0;
+
+ proc_root_vhba = proc_root_vhba_dev = NULL;
+
+ proc_root_vhba = proc_mkdir("driver/xsvhba", NULL);
+ if (!proc_root_vhba) {
+ eprintk(NULL, "Unable to create /proc/driver/xsvhba\n");
+ return -1;
+ } else {
+ debug_file = proc_create_data("debug", S_IFREG, proc_root_vhba,
+ &vhba_debug_proc_fops, NULL);
+
+ force_copy_file =
+ proc_create_data("force_copy", S_IFREG, proc_root_vhba,
+ &vhba_force_copy_proc_fops, NULL);
+
+ san_info = proc_create_data("san-info", S_IFREG, proc_root_vhba,
+ &vhba_san_info_proc_fops, NULL);
+
+ proc_root_vhba_dev = proc_mkdir("devices", proc_root_vhba);
+ proc_root_vhba_targ = proc_mkdir("target_info", proc_root_vhba);
+ }
+
+ return 0;
+}
+
+void vhba_remove_procfs_root_entries(void)
+{
+ dprintk(TRC_PROC, NULL, "removing target_info proc entry\n");
+ if (proc_root_vhba_targ)
+ remove_proc_entry("target_info", proc_root_vhba);
+
+ dprintk(TRC_PROC, NULL, "removing devices proc entry\n");
+ if (proc_root_vhba_dev)
+ remove_proc_entry("devices", proc_root_vhba);
+
+ dprintk(TRC_PROC, NULL, "removing debug proc entry\n");
+ if (proc_root_vhba_dev)
+ remove_proc_entry("debug", proc_root_vhba);
+
+ dprintk(TRC_PROC, NULL, "removing san-info proc entry\n");
+ if (proc_root_vhba_dev)
+ remove_proc_entry("san-info", proc_root_vhba);
+
+ dprintk(TRC_PROC, NULL, "removing force copy proc entry\n");
+ if (proc_root_vhba_dev)
+ remove_proc_entry("force_copy", proc_root_vhba);
+
+ dprintk(TRC_PROC, NULL, "removing vhba proc entry\n");
+ if (proc_root_vhba)
+ remove_proc_entry("driver/xsvhba", NULL);
+}
+
+int vhba_print_io_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+ int i;
+
+ seq_puts(m, "VHBA I/O Statistics\n");
+ seq_puts(m, "-------------------\n");
+ seq_printf(m, "Read I/O Reqs\t\t\t: %llu\n",
+ (u64) ha->stats.io_stats.total_read_reqs);
+ seq_printf(m, "Write I/O Reqs\t\t\t: %llu\n",
+ (u64) ha->stats.io_stats.total_write_reqs);
+ seq_printf(m, "Task Mgmt Reqs\t\t\t: %llu\n",
+ (u64) ha->stats.io_stats.total_task_mgmt_reqs);
+ seq_printf(m, "CS_TIMEOUT Count\t\t: %llu\n",
+ (u64) ha->vhba->cs_timeout_count);
+ seq_printf(m, "Abort Count\t\t\t: %llu\n",
+ (u64) atomic_read(&ha->vhba->abort_count));
+ seq_printf(m, "Total I/O Rsps\t\t\t: %llu\n",
+ (u64) ha->stats.io_stats.total_io_rsp);
+ seq_printf(m, "Total copy I/Os\t\t\t: %llu\n",
+ (u64) ha->stats.io_stats.total_copy_ios);
+ seq_printf(m, "Total copy page allocs\t\t: %llu\n",
+ (u64) ha->stats.io_stats.total_copy_page_allocs);
+ seq_printf(m, "Total copy page frees\t\t: %llu\n",
+ (u64) ha->stats.io_stats.total_copy_page_frees);
+ for (i = 0; i < VHBA_MAX_VH_Q_COUNT; i++) {
+ seq_printf(m, "Pending reqs for VH queue-%-2d\t: %llu\n", i,
+ (u64) atomic_read(&ha->stats.io_stats.
+ num_vh_q_reqs[i]));
+ }
+
+ seq_printf(m, "Curr outstding cmd\t\t: %llu\n",
+ (u64) ha->current_outstanding_cmd);
+
+ seq_printf(m, "Bytes Read\t\t\t: %llu\n",
+ (u64) ha->stats.io_stats.total_read_mbytes);
+ seq_printf(m, "Bytes Written\t\t\t: %llu\n",
+ (u64) ha->stats.io_stats.total_write_mbytes);
+
+ seq_printf(m, "Queue cmd busy return count\t: %llu\n",
+ (u64) ha->stats.io_stats.qcmd_busy_ret_cnt);
+
+ return 0;
+}
+
+int vhba_print_ib_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+ int i;
+ struct ib_cntr {
+ char name[32];
+ u64 *cntr;
+ } ib_cntrs[] = {
+ {
+ "CQP down", &(ha->stats.ib_stats.cqp_dn_cnt)}, {
+ "CQP up", &(ha->stats.ib_stats.cqp_up_cnt)}, {
+ "CQP send error", &(ha->stats.ib_stats.cqp_send_err_cnt)}, {
+ "CQP receive error", &(ha->stats.ib_stats.cqp_recv_err_cnt)},
+ {
+ "CQP remote disconnect",
+ &(ha->stats.ib_stats.cqp_remote_disconn_cnt)}, {
+ "DQP down", &(ha->stats.ib_stats.dqp_dn_cnt)}, {
+ "DQP up", &(ha->stats.ib_stats.dqp_up_cnt)}, {
+ "DQP send error", &(ha->stats.ib_stats.dqp_send_err_cnt)}, {
+ "DQP receive error", &(ha->stats.ib_stats.dqp_recv_err_cnt)},
+ {
+ "DQP remote disconnect",
+ &(ha->stats.ib_stats.dqp_remote_disconn_cnt)}, {
+ "Current outstanding reqs",
+ &(ha->stats.ib_stats.curr_outstanding_reqs)}, {
+ "Request queue full", &(ha->stats.ib_stats.total_req_q_fulls)},
+ {
+ "Outstanding queue wraps",
+ &(ha->stats.ib_stats.total_outstding_q_wraps)},};
+
+ seq_puts(m, "VHBA IB Statistics\n");
+ seq_puts(m, "------------------\n");
+ for (i = 0; i < 13; i++) {
+ seq_printf(m, "%-24s\t: %llu\n",
+ ib_cntrs[i].name, (u64) *(ib_cntrs[i].cntr));
+ }
+ return 0;
+}
+
+int vhba_print_xsmp_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+ int i;
+ struct xsmp_cntr {
+ char name[32];
+ u64 *cntr;
+ } xsmp_cntrs[] = {
+ {
+ "install", &(vhba_xsmp_stats.install_msg_cnt)}, {
+ "delete", &(vhba_xsmp_stats.delete_msg_cnt)}, {
+ "update", &(vhba_xsmp_stats.update_msg_cnt)}, {
+ "stats config", &(vhba_xsmp_stats.cfg_stats_msg_cnt)}, {
+ "stats clear", &(vhba_xsmp_stats.clr_stats_msg_cnt)}, {
+ "sync begin", &(vhba_xsmp_stats.sync_begin_msg_cnt)}, {
+ "sync end", &(vhba_xsmp_stats.sync_end_msg_cnt)}, {
+ "oper req", &(vhba_xsmp_stats.oper_req_msg_cnt)}, {
+ "unknown xsmp", &(vhba_xsmp_stats.unknown_msg_cnt)}, {
+ "xt state down", &(vhba_xsmp_stats.xt_state_dn_cnt)}, {
+ "tca lid change", &(vhba_xsmp_stats.tca_lid_changed_cnt)}, {
+ "abort all", &(vhba_xsmp_stats.abort_all_cnt)},};
+
+ seq_puts(m, "VHBA XSMP Statistics\n");
+ seq_puts(m, "--------------------\n");
+ for (i = 0; i < 12; i++) {
+ seq_printf(m, "%-20s\t\t: %llu\n",
+ xsmp_cntrs[i].name, (u64) *(xsmp_cntrs[i].cntr));
+ }
+ seq_printf(m, "Last unknown xsmp msg\t\t: %llu\n",
+ (u64) vhba_xsmp_stats.last_unknown_msg);
+ seq_printf(m, "Last known xsmp msg\t\t: %llu\n",
+ (u64) vhba_xsmp_stats.last_msg);
+ return 0;
+}
+
+int vhba_print_fmr_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+ int i;
+ struct fmr_cntr {
+ char name[32];
+ u64 *cntr;
+ } fmr_cntrs[] = {
+ {
+ "FMR successful map", &(ha->stats.fmr_stats.map_cnt)}, {
+ "FMR unmap", &(ha->stats.fmr_stats.unmap_cnt)}, {
+ "FMR map fail", &(ha->stats.fmr_stats.map_fail_cnt)}, {
+ "Unaligned i/o", &(ha->stats.fmr_stats.unaligned_io_cnt)}, {
+ "Unaligned sg list ptr",
+ &(ha->stats.fmr_stats.unaligned_ptr_cnt)}, {
+ "FMR i/o", &(ha->stats.fmr_stats.total_fmr_ios)},};
+
+ seq_puts(m, "VHBA FMR Statistics\n");
+ seq_puts(m, "-------------------\n");
+ for (i = 0; i < 6; i++) {
+ seq_printf(m, "%-24s\t: %llu\n",
+ fmr_cntrs[i].name, (u64) *(fmr_cntrs[i].cntr));
+ }
+ return 0;
+}
+
+int vhba_print_fc_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+ int i;
+ struct fc_cntr {
+ char name[32];
+ u64 *cntr;
+ } fc_cntrs[] = {
+ {
+ "FC link down", &(ha->stats.fc_stats.link_dn_cnt)}, {
+ "FC link dead", &(ha->stats.fc_stats.link_dead_cnt)}, {
+ "FC link up", &(ha->stats.fc_stats.link_up_cnt)}, {
+ "Target online RSCN", &(ha->stats.fc_stats.rscn_up_cnt)}, {
+ "Target offline RSCN", &(ha->stats.fc_stats.rscn_dn_cnt)}, {
+ "Target dead RSCN", &(ha->stats.fc_stats.rscn_dead_cnt)}, {
+ "Dup RSCN for online tgt",
+ &(ha->stats.fc_stats.rscn_multiple_up_cnt)}, {
+ "Dup RSCN for offline tgt",
+ &(ha->stats.fc_stats.rscn_multiple_dn_cnt)}, {
+ "Last online target", &(ha->stats.fc_stats.last_up_tgt)}, {
+ "Last dead target", &(ha->stats.fc_stats.last_dead_tgt)}, {
+ "Last offline target", &(ha->stats.fc_stats.last_dn_tgt)}, {
+ "Disc info msg received", &(ha->stats.fc_stats.disc_info_cnt)},
+ {
+ "Enable resp msg received",
+ &(ha->stats.fc_stats.enable_resp_cnt)}, {
+ "Enable msg sent", &(ha->stats.fc_stats.enable_msg_cnt)},};
+
+ seq_puts(m, "VHBA FC Statistics\n");
+ seq_puts(m, "------------------\n");
+ for (i = 0; i < 14; i++) {
+ seq_printf(m, "%-24s\t: %llu\n",
+ fc_cntrs[i].name, (u64) *(fc_cntrs[i].cntr));
+ }
+ return 0;
+}
+
+int vhba_print_scsi_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+ int i;
+ struct scsi_cntr {
+ char name[32];
+ u64 *cntr;
+ } scsi_cntrs[] = {
+ {
+ "Invalid target", &(ha->stats.scsi_stats.invalid_tgt_cnt)},
+ {
+ "Invalid lun", &(ha->stats.scsi_stats.invalid_lun_cnt)}, {
+ "Successful abort", &(ha->stats.scsi_stats.abort_success_cnt)},
+ {
+ "Failed abort", &(ha->stats.scsi_stats.abort_fail_cnt)}, {
+ "Successful device reset",
+ &(ha->stats.scsi_stats.dev_reset_success_cnt)}, {
+ "Failed device reset",
+ &(ha->stats.scsi_stats.dev_reset_fail_cnt)}, {
+ "Successful bus reset",
+ &(ha->stats.scsi_stats.bus_reset_success_cnt)}, {
+ "Failed bus reset",
+ &(ha->stats.scsi_stats.bus_reset_fail_cnt)}, {
+ "Successful host reset",
+ &(ha->stats.scsi_stats.host_reset_success_cnt)}, {
+ "Failed host reset",
+ &(ha->stats.scsi_stats.host_reset_fail_cnt)},};
+
+ seq_puts(m, "VHBA SCSI Statistics\n");
+ seq_puts(m, "--------------------\n");
+ for (i = 0; i < 10; i++) {
+ seq_printf(m, "%-24s\t: %llu\n", scsi_cntrs[i].name,
+ (u64) *(scsi_cntrs[i].cntr));
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <scsi/scsi_tcq.h>
+
+#include "vhba_defs.h"
+#include "vhba_ib.h"
+#include "vhba_align.h"
+#include "vhba_scsi_intf.h"
+
+#define XG_VHBA_VERSION "1.0.0"
+#define VHBA_ABORT_TIMEOUT 5
+#define VHBA_RESET_TIMEOUT 10
+
+static int xg_vhba_slave_configure(struct scsi_device *device);
+static int xg_vhba_eh_abort(struct scsi_cmnd *);
+static int xg_vhba_eh_device_reset(struct scsi_cmnd *);
+static int xg_vhba_eh_bus_reset(struct scsi_cmnd *);
+static int xg_vhba_eh_host_reset(struct scsi_cmnd *);
+
+struct info_str {
+ char *buffer;
+ int length;
+ off_t offset;
+ int pos;
+};
+
+static int xg_vhba_slave_configure(struct scsi_device *device)
+{
+ dprintk(TRC_FUNCS, NULL, "Entering...\n");
+
+ scsi_change_queue_depth(device, vhba_max_q_depth);
+
+ dprintk(TRC_FUNCS, NULL, "Returning\n");
+ return 0;
+}
+
+void xsigo_cmd_timeout(struct srb *sp)
+{
+ struct scsi_xg_vhba_host *ha;
+ struct scsi_cmnd *cmd;
+ struct virtual_hba *vhba;
+ int iocb_handle;
+ unsigned long flags = 0;
+
+ cmd = sp->cmd;
+ ha = sp->ha;
+ vhba = ha->vhba;
+
+ dprintk(TRC_SCSI_ERRS, vhba, "I/O timeout\n");
+
+ spin_lock_irqsave(&ha->io_lock, flags);
+
+ atomic_dec(&ha->stats.io_stats.num_vh_q_reqs[sp->queue_num]);
+
+ cmd->result = DID_ABORT << 16;
+ iocb_handle = sp->iocb_handle;
+ if (ha->outstanding_cmds[iocb_handle]) {
+ ha->outstanding_cmds[iocb_handle] = NULL;
+ CMD_SP(sp->cmd) = NULL;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ complete_cmd_and_callback(vhba, sp, sp->cmd);
+ DEC_REF_CNT(vhba);
+ } else {
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ }
+}
+
+static int xg_vhba_queuecommand_lck(struct scsi_cmnd *cmd,
+ void (*fn)(struct scsi_cmnd *))
+{
+ struct virtual_hba *vhba;
+ struct Scsi_Host *host;
+ struct scsi_xg_vhba_host *ha;
+ u32 t, l;
+ struct srb *sp;
+ struct os_tgt *tq;
+ struct os_lun *lq;
+ unsigned long flags = 0;
+ int index = -1;
+ u32 queue_num = 0;
+ u32 curr_position = 0;
+ int vhba_state;
+ int lun_map_byte, lun_map_bit;
+ int vv, lindex = -1;
+ struct srb *xg_sp;
+ int found = 0;
+ u32 timeout_val;
+
+ host = cmd->device->host;
+ cmd->scsi_done = fn;
+
+ if (!host) {
+ dprintk(TRC_SCSI_ERRS, NULL,
+ "host ptr is null in queuecommand\n");
+ return SCSI_MLQUEUE_HOST_BUSY;
+ }
+ vhba = vhba_get_context_by_idr((u32) *(host->hostdata));
+ if (vhba == NULL) {
+ cmd->result = DID_NO_CONNECT << 16;
+ if (cmd->scsi_done)
+ (cmd->scsi_done) (cmd);
+ else
+ dprintk(TRC_SCSI_ERRS, NULL, "scsi_done is null\n");
+ return 0;
+ }
+ ha = (struct scsi_xg_vhba_host *)vhba->ha;
+
+ vhba_state = atomic_read(&vhba->vhba_state);
+ if ((vhba_state != VHBA_STATE_ACTIVE) &&
+ (vhba_state != VHBA_STATE_SCAN)) {
+ dprintk(TRC_SCSI_ERRS, vhba,
+ "Error - vhba not active! returning DID_NO_CONNECT\n");
+ cmd->result = DID_NO_CONNECT << 16;
+ (cmd->scsi_done) (cmd);
+ DEC_REF_CNT(vhba);
+ return 0;
+ }
+
+/* Only use this define when you are doing a obj/opt build in vmware */
+#ifdef VMX86_DEVEL
+ if (atomic_read(&vhba->ref_cnt) <= 0)
+ panic("Refcount went negative\n");
+#endif
+
+ if ((atomic_read(&ha->ib_status) == VHBA_IB_DEAD) ||
+ (atomic_read(&ha->ib_status) == VHBA_IB_DOWN)) {
+ cmd->result = DID_NO_CONNECT << 16;
+ if (cmd->scsi_done) {
+ (cmd->scsi_done) (cmd);
+ dprintk(TRC_SCSI_ERRS, vhba,
+ "returning DID_NO_CONNECT as QP is down\n");
+ }
+ DEC_REF_CNT(vhba);
+ return 0;
+ }
+
+ t = cmd->device->id;
+ l = cmd->device->lun;
+ dprintk(TRC_IO, vhba, "recvd tgt %d, lun %d\n", t, l);
+
+ if (l >= ha->max_luns) {
+ ha->stats.scsi_stats.invalid_lun_cnt++;
+ cmd->result = DID_NO_CONNECT << 16;
+ dprintk(TRC_SCSI_ERRS, vhba, "Invalid lun %d max luns %d\n",
+ l, ha->max_luns);
+ goto release_return;
+ }
+
+ if (t >= ha->max_tgt_id) {
+ ha->stats.scsi_stats.invalid_tgt_cnt++;
+ if (ha->max_tgt_id != 0) {
+ cmd->result = DID_BAD_TARGET << 16;
+ dprintk(TRC_INFO, vhba, "Invalid target %d\n ", t);
+ dprintk(TRC_INFO, vhba, "targt cnt %d", ha->max_tgt_id);
+ } else {
+ cmd->result = DID_NO_CONNECT << 16;
+ }
+ goto release_return;
+ }
+
+ if (vhba_multiple_q)
+ queue_num = t % VHBA_MAX_VH_Q_COUNT;
+ else
+ queue_num = 0;
+
+ spin_lock_irqsave(&ha->io_lock, flags);
+
+ if (atomic_read(&ha->stats.io_stats.num_vh_q_reqs[queue_num])
+ >= vhba_max_q_depth) {
+ atomic_inc(&ha->stats.io_stats.vh_q_full_cnt[queue_num]);
+ /*
+ * Queue is full. If we have a command with ABORTING
+ * status pending in the outstanding array for this target,
+ * then in all likelyhood iocard/vh is hosed. Take the recovery
+ * action and disconnect the QP.
+ */
+
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ if (vhba_recovery_action(ha, t)) {
+ cmd->result = DID_NO_CONNECT << 16;
+ cmd->scsi_done(cmd);
+ DEC_REF_CNT(vhba);
+ return 0;
+ }
+ ha->stats.io_stats.qcmd_busy_ret_cnt++;
+ DEC_REF_CNT(vhba);
+ return SCSI_MLQUEUE_HOST_BUSY;
+
+ }
+
+ atomic_inc(&ha->stats.io_stats.num_vh_q_reqs[queue_num]);
+
+ lun_map_byte = l / 8;
+ lun_map_bit = l % 8;
+ tq = TGT_Q(ha, t);
+ if (tq) {
+ if (tq->init_done == 0) {
+ dprintk(TRC_IO, vhba,
+ "setting dma alignment to %ld for tgt %d\n",
+ PAGE_SIZE, t);
+ blk_queue_dma_alignment(cmd->device->request_queue,
+ (PAGE_SIZE - 1));
+ tq->init_done = 1;
+ }
+ if (!(vhba->cfg->lunmask_enable))
+ goto no_lun_mask;
+
+ if (l < MAX_FIBRE_LUNS) {
+ for (vv = 0; vv < tq->fcport->lun_count; vv++) {
+ if (l == tq->fcport->lun_ids[vv]) {
+ lindex = vv;
+ found = 1;
+ break;
+ }
+ }
+ } else
+ found = 1;
+
+ dprintk(TRC_IO, vhba,
+ "l=%d, lun_ids=%d,", l, tq->fcport->lun_ids[lindex]);
+ dprintk(TRC_INFO, vhba, "cmd=%02x\n", cmd->cmnd[0]);
+
+ if (found == 0) {
+ if (l == 0) {
+ if (cmd->cmnd[0] == INQUIRY) {
+ struct scatterlist *sg;
+ char *buf;
+
+ cmd->result = DID_OK << 16;
+ if (scsi_sg_count(cmd)) {
+ unsigned int sg_offset;
+
+ sg = scsi_sglist(cmd);
+ sg_offset = SG_OFFSET(sg);
+
+ buf = page_address(sg_page(sg))
+ + sg_offset;
+
+ *buf = 0x7f;
+ *(buf + 2) = 0x03;
+ *(buf + 3) = 0x22;
+ *(buf + 4) = 0x00;
+ } else if (scsi_bufflen(cmd)) {
+ buf = (u8 *) scsi_sglist(cmd);
+ *buf = 0x7f;
+ *(buf + 2) = 0x03;
+ *(buf + 3) = 0x22;
+ *(buf + 4) = 0x00;
+ }
+ dprintk(TRC_IO, vhba, "Mask LUN 0\n");
+ spin_unlock_irqrestore(&ha->io_lock,
+ flags);
+ goto dec_release_return;
+ }
+ } else {
+ ha->stats.scsi_stats.invalid_lun_cnt++;
+ cmd->result = DID_NO_CONNECT << 16;
+ dprintk(TRC_SCSI_ERRS, vhba, "(LUN ID) Error");
+ dprintk(TRC_SCSI_ERRS, vhba, "lun %d ", l);
+ dprintk(TRC_SCSI_ERRS, vhba, "not found in ");
+ dprintk(TRC_SCSI_ERRS, vhba, "target queue!\n");
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ goto dec_release_return;
+ }
+ }
+no_lun_mask:
+ lq = LUN_Q(ha, t, l);
+ if (!(lq)) {
+ lq = vhba_allocate_lun(vhba, t, l);
+ if (lq)
+ lq->fclun = kmalloc(sizeof(struct fc_lun),
+ GFP_ATOMIC);
+ if (!lq || !lq->fclun) {
+ cmd->result = DID_NO_CONNECT << 16;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ goto dec_release_return;
+ }
+ memset(lq->fclun, 0, sizeof(struct fc_lun));
+ lq->fclun->lun = l;
+ }
+
+ dprintk(TRC_IO, vhba, "mapped tgt %d" " lun %d\n", t, l);
+ } else {
+ ha->stats.scsi_stats.invalid_tgt_cnt++;
+ cmd->result = DID_NO_CONNECT << 16;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ goto dec_release_return;
+ }
+
+ /* Maximum SCSI I/O retry */
+ if (cmd->allowed < vhba_max_scsi_retry)
+ cmd->allowed = vhba_max_scsi_retry;
+
+ if (atomic_read(&ha->link_state) == LINK_DEAD ||
+ atomic_read(&tq->fcport->state) == FCS_DEVICE_DEAD) {
+ cmd->result = DID_NO_CONNECT << 16;
+
+ dprintk(TRC_TIMER, vhba, "Error - link/tgt dead!\n");
+ dprintk(TRC_TIMER, vhba, "Link state %d device state %d\n",
+ atomic_read(&ha->link_state),
+ atomic_read(&tq->fcport->state));
+
+ dprintk(TRC_TIMER, vhba, "sp(%p) cmd:(%p)", CMD_SP(cmd), cmd);
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ goto dec_release_return;
+ }
+
+ if (vhba->cfg->lunmask_enable) {
+ /* Report lun interception */
+ if ((cmd->cmnd[0] == REPORT_LUNS) &&
+ (atomic_read(&ha->link_state) == LINK_UP) &&
+ (atomic_read(&tq->fcport->state) == FCS_ONLINE)) {
+ /* Just decrement the ha reference counter right away
+ * as the command is not going to be sent to the
+ * chip anyway.*/
+ atomic_dec(&ha->stats.io_stats.
+ num_vh_q_reqs[queue_num]);
+ xg_sp = kmalloc(sizeof(struct srb), GFP_ATOMIC);
+ if (xg_sp == NULL) {
+ cmd->result = DID_ERROR << 16;
+ eprintk(vhba, "Error - allocate SRB failed\n");
+ goto release_return;
+ }
+ memset(xg_sp, 0, sizeof(struct srb));
+ xg_sp->cmd = cmd;
+ xg_sp->ha = ha;
+ CMD_SP(cmd) = (void *)xg_sp;
+ xg_sp->state = 0;
+ xg_sp->abort_cnt = 0;
+
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+
+ if (vhba_report_luns_cmd(xg_sp, t, l)) {
+ kfree(xg_sp);
+ goto release_return;
+ } else {
+
+ cmd->result = DID_OK << 16;
+
+ if (xg_sp->cmd) {
+ if (xg_sp->cmd->scsi_done)
+ (*(xg_sp->cmd)->scsi_done)
+ (xg_sp->cmd);
+ }
+ kfree(xg_sp);
+ /*
+ * Decrement vhba ref cnt, since the cmd
+ * is not going down.
+ */
+ DEC_REF_CNT(vhba);
+ goto ret_success;
+ }
+ }
+ }
+ index = get_outstding_cmd_entry(vhba);
+ if (index == -1) {
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ dprintk(TRC_SCSI_ERRS, vhba,
+ "Warn - Max limit on outstanding commands reached.\n");
+ dprintk(TRC_SCSI_ERRS, vhba, "returnin SCSI_MLQUEUE_HOST_BUSY");
+ atomic_dec(&ha->stats.io_stats.num_vh_q_reqs[queue_num]);
+ ha->stats.io_stats.qcmd_busy_ret_cnt++;
+ DEC_REF_CNT(vhba);
+ return SCSI_MLQUEUE_HOST_BUSY;
+
+ }
+
+ ha->outstanding_cmds[ha->current_outstanding_cmd] =
+ kmalloc(sizeof(struct srb), GFP_ATOMIC);
+ if (ha->outstanding_cmds[ha->current_outstanding_cmd] == NULL) {
+ cmd->result = DID_ERROR << 16;
+ eprintk(vhba, "Error - allocate SRB failed\n");
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ goto dec_release_return;
+ }
+
+ sp = ha->outstanding_cmds[ha->current_outstanding_cmd];
+ memset(sp, 0, sizeof(struct srb));
+ sp->cmd = cmd;
+ sp->ha = ha;
+ CMD_SP(cmd) = (void *)sp;
+ sp->state = 0;
+ sp->tgt_queue = tq;
+ sp->lun_queue = lq;
+ sp->error_flag = 0;
+ sp->abort_cnt = 0;
+ sp->unaligned_sg = NULL;
+
+ sp->queue_num = queue_num;
+
+ if (tq->fcport->flags & FCF_TAPE_PRESENT)
+ sp->flags |= SRB_TAPE;
+
+ /* Check for processor irq affinity or few outstanding
+ I/O for processing otherwise the IRQ can pick up and submit the I/O
+ */
+
+ curr_position = ha->current_outstanding_cmd++;
+ if (ha->current_outstanding_cmd == MAX_OUTSTANDING_COMMANDS)
+ ha->current_outstanding_cmd = 0;
+
+ if ((timeout_per_command(cmd) / HZ) <= IB_CMD_TIMEOUT_DELTA)
+ timeout_val = vhba_default_scsi_timeout;
+ else
+ timeout_val = timeout_per_command(cmd) / HZ;
+
+ sp->timer.expires = jiffies + (timeout_val - IB_CMD_TIMEOUT_DELTA) * HZ;
+ init_timer(&sp->timer);
+ sp->timer.data = (unsigned long)sp;
+ sp->timer.function = (void (*)(unsigned long))xsigo_cmd_timeout;
+ add_timer(&sp->timer);
+
+ /* Prepare the IOCB, the handle, build IOCB and fire it off */
+ dprintk(TRC_IO, vhba,
+ "calling start scsi for sp %p t %d l %d\n", sp, t, (u32) l);
+
+ if (vhba_start_scsi(sp, t, l, curr_position)) {
+ dprintk(TRC_INFO, vhba,
+ "vhba_start_scsi failed sp=%p cmd=%p\n", sp, sp->cmd);
+ if (sp->timer.function != NULL) {
+ del_timer(&sp->timer);
+ sp->timer.function = NULL;
+ }
+ if (ha->outstanding_cmds[curr_position]) {
+ CMD_SP(sp->cmd) = NULL;
+ kfree(ha->outstanding_cmds[curr_position]);
+ ha->outstanding_cmds[curr_position] = NULL;
+ } else {
+ /* Cmd got flushed asynchronously */
+ dprintk(TRC_INFO, vhba,
+ "Cmd Got flushed Asynchronously");
+ dprintk(TRC_INFO, vhba, " sp=%p cmd=%p\n", sp, sp->cmd);
+ DEC_REF_CNT(vhba);
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ return 0;
+ }
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ cmd->result = DID_BUS_BUSY << 16;
+ goto dec_release_return;
+ }
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+
+ret_success:
+ dprintk(TRC_FUNCS, vhba, "Returning SUCCESS\n");
+ return 0;
+
+dec_release_return:
+ atomic_dec(&ha->stats.io_stats.num_vh_q_reqs[queue_num]);
+
+release_return:
+ dprintk(TRC_SCSI_ERRS, vhba, "returning cmd status %d from qcmd\n",
+ (int)((cmd->result) >> 16));
+ (cmd->scsi_done) (cmd);
+
+ DEC_REF_CNT(vhba);
+ return 0;
+}
+
+/*
+ * The queuecommand has changed from 2.6.37 where it is
+ * now lock-less and the prototype has changed.
+ * In order to provide backward compatibility a MACRO
+ * is provided by linux which will call queuecommand
+ * with host_lock held. We will use that MACRO so that the
+ * behavior is the same before 2.6.37
+ * Please see Documentation/scsi/scsi_mid_low_api.txt in
+ * linux kernel tree and the following URL
+ * for discussion on lockless queuecommand.
+ * http://www.spinics.net/lists/linux-scsi/msg48200.html
+ */
+
+#if !defined(DEF_SCSI_QCMD)
+
+#define xg_vhba_queuecommand xg_vhba_queuecommand_lck
+
+#else
+
+DEF_SCSI_QCMD(xg_vhba_queuecommand)
+#endif
+struct scsi_host_template xg_vhba_driver_template = {
+ .module = THIS_MODULE,
+ .name = "xsvhba",
+ .proc_name = "xsvhba",
+ .queuecommand = xg_vhba_queuecommand,
+ .eh_abort_handler = xg_vhba_eh_abort,
+ .eh_device_reset_handler = xg_vhba_eh_device_reset,
+ .eh_bus_reset_handler = xg_vhba_eh_bus_reset,
+ .eh_host_reset_handler = xg_vhba_eh_host_reset,
+ .slave_configure = xg_vhba_slave_configure,
+#ifdef CONFIG_SCSI_QLA2xxx_FAILOVER
+ .ioctl = xg_vhba_ioctl,
+#endif
+ .this_id = -1,
+ .cmd_per_lun = 1,
+ .use_clustering = ENABLE_CLUSTERING,
+/* Xsigo limit is 6 */
+ .sg_tablesize = 1,
+/* 512 secs * 512 bytes = VH limit (256 KB) */
+ .max_sectors = VHBA_DEFAULT_TRANSFER_SIZE,
+ .use_blk_tags = 1,
+};
+
+void sp_put(struct virtual_hba *vhba, struct srb *sp)
+{
+ if ((sp->cmd) && (sp->cmd->scsi_done))
+ (*(sp->cmd)->scsi_done) (sp->cmd);
+ kfree(sp);
+}
+
+static int xg_vhba_eh_abort(struct scsi_cmnd *cmd)
+{
+ struct virtual_hba *vhba;
+ struct srb *sp, *sp1;
+ unsigned int b, t, l;
+ struct scsi_xg_vhba_host *ha = NULL;
+ unsigned long flags = 0;
+ int iocb_handle = 0;
+ int i, ret = FAILED;
+
+ vhba = vhba_get_context_by_idr((u32) *(cmd->device->host->hostdata));
+
+ if (vhba == NULL) {
+ dprintk(TRC_ERRORS, NULL,
+ "Could not find vhba for this command\n");
+ return FAILED;
+ }
+ ha = vhba->ha;
+
+ spin_lock_irqsave(&ha->io_lock, flags);
+
+ sp = (struct srb *)CMD_SP(cmd);
+
+ if (sp == NULL) {
+ dprintk(TRC_INFO, vhba, "cmd already done cmd=%p\n", cmd);
+ ret = FAILED;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ goto out;
+ }
+
+ /* Generate LU queue on bus, target, LUN */
+ b = cmd->device->channel;
+ t = cmd->device->id;
+ l = cmd->device->lun;
+
+ /*
+ * Print the type of command and size of the IO being aborted.
+ */
+ dprintk(TRC_INFO, vhba,
+ "Abort cmd called for sp=%p, cmd=%p,", sp, cmd);
+ dprintk(TRC_INFO, vhba, " opcode/len = 0x%x/0x%x\n",
+ cmd->cmnd[0], scsi_bufflen(cmd));
+
+ atomic_inc(&vhba->abort_count);
+
+ for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+ sp1 = ha->outstanding_cmds[i];
+ if (sp1 == NULL)
+ continue;
+ if (sp1->cmd == cmd) {
+ /*
+ * We found the command. sp1 must be same as sp, if
+ * not, we have a duplicate command in the list, and
+ * we should fail this abort.
+ */
+ if (sp1 != sp) {
+ dprintk(TRC_INFO, vhba,
+ "Duplicate cmd in Outstanding array: ");
+ dprintk(TRC_INFO, vhba, "sp=%p, cmd=%p,sp1=%p",
+ sp, cmd, sp1);
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ ret = FAILED;
+ goto out;
+ }
+ break;
+ }
+ }
+ /*
+ * If IOP did not respond to the first abort and it
+ * failed through this routine, it is possible that the IOP
+ * never got a chance to look at the abort and the command
+ * about to be aborted crossed paths with the abort failure.
+ * In that case, mark the second attempt to abort this command
+ * as success.
+ */
+ if ((sp->state == VHBA_IO_STATE_ABORTED) ||
+ (sp->state == VHBA_IO_STATE_ABORT_NEEDED)) {
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ goto success;
+ }
+
+ if (i == MAX_OUTSTANDING_COMMANDS) {
+ if (atomic_read(&ha->ib_status) == VHBA_IB_DEAD) {
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ ret = FAILED;
+ goto out;
+ }
+ dprintk(TRC_INFO, vhba,
+ "Failing Abort(): cant find sp:0x%p, ", sp);
+ dprintk(TRC_INFO, vhba, "cmd:0x%p sp->cmd:0x%p", cmd, sp->cmd);
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ ret = FAILED;
+ goto out;
+ }
+
+ sp->state = VHBA_IO_STATE_ABORTING;
+ iocb_handle = sp->iocb_handle;
+
+ /*
+ * It may take upto 30 seconds for a target to transition from
+ * LOST to ONLINE/DEAD state. Aborts will continue to fail during
+ * that time. Allow that much time before starting recovery.
+ */
+
+ if (((sp->abort_cnt)++ > vhba_abort_recovery_count) &&
+ (atomic_read(&ha->ib_status) == VHBA_IB_UP)) {
+ /*
+ * We are stuck in ABORT loop due to IOP/agent being stuck
+ * Purge all pending IOs and disconnect/reconnect QP
+ */
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ dprintk(TRC_INFO, vhba,
+ "Abort failed %d times", vhba_abort_recovery_count);
+ dprintk(TRC_INFO, vhba, "initiating recovery action\n");
+ atomic_set(&ha->ib_status, VHBA_IB_DEAD);
+ vhba_purge_pending_ios(vhba);
+ /*
+ * Let the Work Queue thread disconnect the Q pair.
+ */
+ atomic_set((&ha->qp_status), VHBA_QP_TRYCONNECTING);
+ ret = FAILED;
+ goto out;
+
+ }
+
+ ret = vhba_send_abort(vhba, iocb_handle, t);
+ if (ret) {
+ /*
+ * If QP is disconnected, complete the abort
+ */
+ if (ret == VHBA_QP_DISCONNECTED) {
+ if (ha->outstanding_cmds[iocb_handle]) {
+ ha->outstanding_cmds[iocb_handle] = NULL;
+ atomic_dec(&ha->stats.
+ io_stats.num_vh_q_reqs[sp->
+ queue_num]);
+ goto success;
+ } else {
+ dprintk(TRC_INFO, vhba,
+ "cmd completed while we were in abort()");
+ dprintk(TRC_INFO, vhba, "cmd = %p sp->cmd = %p",
+ cmd, sp->cmd);
+ ret = FAILED;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ goto out;
+ }
+ }
+ ha->stats.scsi_stats.abort_fail_cnt++;
+ dprintk(TRC_INFO, vhba, "Error - send abort failed %d\n", ret);
+ ret = FAILED;
+ sp->state = VHBA_IO_STATE_ACTIVE;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ goto out;
+ }
+ if (sp->state == VHBA_IO_STATE_ABORTING) {
+ ret = FAILED;
+ if (sp->timer.function != NULL) {
+ del_timer(&sp->timer);
+ sp->timer.function = NULL;
+ sp->timer.data = (unsigned long)NULL;
+ }
+ sp->state = VHBA_IO_STATE_ABORT_FAILED;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ goto out;
+ }
+
+success:
+ ha->stats.scsi_stats.abort_success_cnt++;
+ if (sp->timer.function != NULL) {
+ del_timer(&sp->timer);
+ sp->timer.function = NULL;
+ sp->timer.data = (unsigned long)NULL;
+ }
+ sp->cmd->result = DID_ABORT << 16;
+ /*
+ * Reacquire the iocb handle and clear the
+ * outstanding array entry.
+ */
+
+ iocb_handle = sp->iocb_handle;
+ if (ha->outstanding_cmds[iocb_handle])
+ ha->outstanding_cmds[iocb_handle] = NULL;
+ CMD_SP(sp->cmd) = NULL;
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+
+ complete_cmd_and_callback(vhba, sp, sp->cmd);
+
+ /*
+ * Decrement Ref count for the original command
+ */
+ DEC_REF_CNT(vhba);
+ ret = SUCCESS;
+ dprintk(TRC_INFO, vhba,
+ "Abort Success for sp=%p, cmd=%p, ", sp, cmd);
+ dprintk(TRC_INFO, vhba, "sp->cmd=%p\n", sp->cmd);
+out:
+ DEC_REF_CNT(vhba);
+ return ret;
+}
+
+static int xg_vhba_eh_device_reset(struct scsi_cmnd *cmd)
+{
+ struct virtual_hba *vhba;
+ unsigned int b, t, l;
+ struct scsi_xg_vhba_host *ha = NULL;
+ int ret = FAILED;
+
+ vhba = vhba_get_context_by_idr((u32) *(cmd->device->host->hostdata));
+
+ if (vhba == NULL) {
+ dprintk(TRC_ERR_RECOV, vhba,
+ "Could not find vhba for this command\n");
+ return FAILED;
+ }
+
+ ha = (struct scsi_xg_vhba_host *)vhba->ha;
+
+ /* Generate LU queue on bus, target, LUN */
+ b = cmd->device->channel;
+ t = cmd->device->id;
+ l = cmd->device->lun;
+
+ dprintk(TRC_INFO, vhba,
+ "Device Reset called for cmd=%p ", cmd);
+ dprintk(TRC_INFO, vhba, "tgt=%d, lun=%d\n", t, l);
+
+ dprintk(TRC_INFO, vhba, "TGT reset:tgt=%d\n", t);
+ ret = vhba_send_tgt_reset(vhba, t);
+ if (ret && ret != VHBA_QP_DISCONNECTED) {
+ ha->stats.scsi_stats.dev_reset_fail_cnt++;
+ dprintk(TRC_INFO, vhba, "Error - send failed\n");
+ ret = FAILED;
+ goto out;
+ } else
+ vhba_taskmgmt_flush_ios(vhba, cmd->device->id, -1, 0);
+
+ ret = SUCCESS;
+ ha->stats.scsi_stats.dev_reset_success_cnt++;
+ dprintk(TRC_INFO, vhba, "Device Reset Successful!\n");
+out:
+ DEC_REF_CNT(vhba);
+ return ret;
+}
+
+static int xg_vhba_eh_bus_reset(struct scsi_cmnd *cmd)
+{
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ vhba = vhba_get_context_by_idr((u32) *(cmd->device->host->hostdata));
+
+ if (vhba == NULL) {
+ eprintk(vhba, "Could not find vhba for this command\n");
+ return FAILED;
+ }
+
+ dprintk(TRC_INFO, vhba, "Bus reset called\n");
+
+ ha = (struct scsi_xg_vhba_host *)vhba->ha;
+
+ vhba_ib_disconnect_qp(vhba);
+ vhba_purge_pending_ios(vhba);
+
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+
+ ha->stats.scsi_stats.bus_reset_success_cnt++;
+ dprintk(TRC_INFO, vhba, "Bus Reset Successful\n");
+
+ DEC_REF_CNT(vhba);
+ return SUCCESS;
+}
+
+static int xg_vhba_eh_host_reset(struct scsi_cmnd *cmd)
+{
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+
+ vhba = vhba_get_context_by_idr((u32) *(cmd->device->host->hostdata));
+
+ if (vhba == NULL) {
+ eprintk(vhba, "Could not find vhba for this command\n");
+ return FAILED;
+ }
+
+ dprintk(TRC_INFO, vhba, "Host Reset Called\n");
+
+ ha = (struct scsi_xg_vhba_host *)vhba->ha;
+
+ vhba_ib_disconnect_qp(vhba);
+ vhba_purge_pending_ios(vhba);
+
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+
+ ha->stats.scsi_stats.bus_reset_success_cnt++;
+ dprintk(TRC_INFO, vhba, "Host Reset Successful\n");
+
+ DEC_REF_CNT(vhba);
+ return SUCCESS;
+}
+
+void copy_mem_info(struct info_str *info, char *data, int len)
+{
+ dprintk(TRC_FUNCS, NULL, "Entering\n");
+
+ if (info->pos + len > info->offset + info->length)
+ len = info->offset + info->length - info->pos;
+
+ if (info->pos + len < info->offset) {
+ info->pos += len;
+ return;
+ }
+
+ if (info->pos < info->offset) {
+ off_t partial;
+
+ partial = info->offset - info->pos;
+ data += partial;
+ info->pos += partial;
+ len -= partial;
+ }
+
+ if (len > 0) {
+ memcpy(info->buffer, data, len);
+ info->pos += len;
+ info->buffer += len;
+ }
+ dprintk(TRC_FUNCS, NULL, "Returning\n");
+}
+
+static int copy_info(struct info_str *info, char *fmt, ...)
+{
+ va_list args;
+ char buf[256];
+ int len;
+
+ va_start(args, fmt);
+ len = vsprintf(buf, fmt, args);
+ va_end(args);
+
+ copy_mem_info(info, buf, len);
+ return len;
+}
+
+int xg_vhba_proc_info(struct Scsi_Host *shost, char *buffer, char **start,
+ off_t offset, int length, int inout)
+{
+ struct virtual_hba *vhba = NULL;
+ struct info_str info;
+ struct scsi_xg_vhba_host *ha;
+ int retval;
+
+ vhba = vhba_get_context_by_idr((u32) *(shost->hostdata));
+ if (vhba == NULL)
+ return 0;
+ ha = vhba->ha;
+
+ if (inout) {
+ DEC_REF_CNT(vhba);
+ return length;
+ }
+
+ if (start)
+ *start = buffer;
+
+ info.buffer = buffer;
+ info.length = length;
+ info.offset = offset;
+ info.pos = 0;
+
+ /* start building the print buffer */
+ copy_info(&info, "Xsigo Virtual Host Adapter\n");
+ copy_info(&info, "Driver version %s\n", XG_VHBA_VERSION);
+
+ retval = info.pos > info.offset ? info.pos - info.offset : 0;
+
+ dprintk(TRC_PROC, vhba,
+ "Exiting proc_info: info.pos=%d,", info.pos);
+ dprintk(TRC_INFO, vhba, "offset=0x%lx, length=0x%x\n", offset, length);
+ DEC_REF_CNT(vhba);
+ return retval;
+}
+
+int vhba_recovery_action(struct scsi_xg_vhba_host *ha, u32 t)
+{
+ struct os_tgt *tq;
+ struct srb *sp;
+ struct virtual_hba *vhba = ha->vhba;
+ unsigned long flags = 0;
+ int i, count = 0;
+ int rval = 0;
+
+ tq = TGT_Q(ha, t);
+
+ spin_lock_irqsave(&ha->io_lock, flags);
+ for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+ if (ha->outstanding_cmds[i]) {
+ sp = ha->outstanding_cmds[i];
+ if ((sp->tgt_queue == tq) &&
+ (sp->state == VHBA_IO_STATE_ABORTING)) {
+ count++;
+ }
+ }
+
+ }
+ spin_unlock_irqrestore(&ha->io_lock, flags);
+ if (count == VHBA_MAX_VH_Q_DEPTH) {
+ /*
+ * We found all the commands stuck in ABORTING state and the
+ * queue is full.Fflush the defer list and purge all pending IOs
+ */
+ dprintk(TRC_INFO, vhba,
+ "Command queue is stuck with aborts.");
+ dprintk(TRC_INFO, vhba, " Take recovery actions.\n");
+
+ atomic_set(&ha->ib_status, VHBA_IB_DEAD);
+
+ vhba_purge_pending_ios(vhba);
+
+ /*
+ * Let the Work Queue thread disconnect the Q pair.
+ */
+
+ atomic_set((&ha->qp_status), VHBA_QP_TRYCONNECTING);
+
+ rval = 1;
+ }
+
+ return rval;
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __VHBA_SCSI_INTF_H__
+#define __VHBA_SCSI_INTF_H__
+
+extern int vhba_max_q_depth;
+extern int vhba_max_scsi_retry;
+extern int vhba_default_scsi_timeout;
+
+int vhba_purge_pending_ios(struct virtual_hba *vhba);
+void vhba_taskmgmt_flush_ios(struct virtual_hba *vhba, int tgt_id, int lun,
+ int lun_reset_flag);
+int vhba_send_abort(struct virtual_hba *vhba, int abort_handle, int t);
+int vhba_send_lun_reset(struct virtual_hba *vhba, int t, int l);
+int vhba_send_tgt_reset(struct virtual_hba *vhba, int t);
+void complete_cmd_and_callback(struct virtual_hba *vhba, struct srb *sp,
+ struct scsi_cmnd *cp);
+int vhba_start_scsi(struct srb *sp, u32 t, u32 l, u32 handle);
+int vhba_report_luns_cmd(struct srb *sp, u32 t, u32 l);
+int vhba_ib_disconnect_qp(struct virtual_hba *vhba);
+int vhba_purge_pending_ios(struct virtual_hba *vhba);
+struct os_lun *vhba_allocate_lun(struct virtual_hba *vhba, u32 tgt, u32 lun);
+int get_outstding_cmd_entry(struct virtual_hba *vhba);
+
+void vhba_set_tgt_count(struct virtual_hba *vhba);
+void vhba_mark_tgts_lost(struct virtual_hba *vhba);
+int vhba_set_all_tgts_offline(struct virtual_hba *vhba);
+
+int xg_vhba_start_scsi(void);
+void xg_vhba_free_device(struct virtual_hba *);
+extern int vhba_scsi_release(struct virtual_hba *vhba);
+void dump_iocb(struct cmd_type_7 *cmd_pkt);
+extern int xg_vhba_proc_info(struct Scsi_Host *shost,
+ char *buffer, char **start, off_t offset,
+ int length, int inout);
+extern int vhba_recovery_action(struct scsi_xg_vhba_host *, u32);
+extern void vhba_workqueue_processor(struct work_struct *work);
+extern int vhba_check_heart_beat(struct virtual_hba *vhba);
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+/* #include <linux/smp_lock.h> */
+#include <linux/delay.h>
+
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+#include "vhba_defs.h"
+#include "vhba_ib.h"
+
+struct timer_list stats_timer;
+u32 stats_timer_on;
+
+void vhba_stats_clear_all(struct vhba_ha_stats *pstats)
+{
+ if (pstats == NULL) {
+ dprintk(0, -1, "NULL stats pointer passed");
+ return;
+ }
+ memset(pstats, 0, sizeof(struct vhba_ha_stats));
+}
+
+void vhba_xsmp_stats_req(struct work_struct *work)
+{
+ struct xsvhba_work *xwork =
+ container_of(work, struct xsvhba_work, work);
+ struct _vhba_stats *msg = (struct _vhba_stats *)xwork->msg;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha = NULL;
+ struct vhba_ha_stats *pstats = NULL;
+
+ vhba = vhba_get_context_by_resource_id(msg->vid);
+
+ if (vhba == NULL)
+ goto out;
+
+ ha = vhba->ha;
+ pstats = &ha->stats;
+
+ if (msg->action == 1) {
+ dprintk(TRC_STATS, NULL, "received clear stats\n");
+ vhba_stats_clear_all(pstats);
+ DEC_REF_CNT(vhba);
+ goto out;
+ } else {
+ dprintk(TRC_STATS, NULL,
+ "received get stats action %d\n", msg->action);
+ msg->totalio = ha->stats.io_stats.total_read_reqs +
+ ha->stats.io_stats.total_write_reqs +
+ ha->stats.io_stats.total_task_mgmt_reqs;
+ msg->readbytecount = ha->stats.io_stats.total_read_mbytes;
+ msg->writebytecount = ha->stats.io_stats.total_write_mbytes;
+ msg->outstandingrequestcount = 0;
+ msg->iorequestcount = msg->totalio;
+ msg->readrequestcount = ha->stats.io_stats.total_read_reqs;
+ msg->writerequestcount = ha->stats.io_stats.total_write_reqs;
+ msg->taskmanagementrequestcount =
+ ha->stats.io_stats.total_task_mgmt_reqs;
+ msg->targetcount = ha->target_count;
+ msg->luncount = ha->lun_count;
+
+ /* this is cummulative and not per vhba */
+ msg->xsmpxtdowncount = vhba_xsmp_stats.xt_state_dn_cnt;
+
+ /* this is also cumulative */
+ msg->xsmpxtoperstaterequestcount =
+ vhba_xsmp_stats.oper_req_msg_cnt;
+ msg->mapfmrcount = ha->stats.fmr_stats.map_cnt;
+ msg->ummapfmrcount = ha->stats.fmr_stats.unmap_cnt;
+ msg->usedmapfmrcount = msg->mapfmrcount - msg->ummapfmrcount;
+ msg->abortcommandcount =
+ ha->stats.scsi_stats.abort_success_cnt +
+ ha->stats.scsi_stats.abort_fail_cnt;
+ msg->resetluncommandcount = 0;
+ msg->resettargetcommandcount =
+ ha->stats.scsi_stats.dev_reset_success_cnt +
+ ha->stats.scsi_stats.dev_reset_fail_cnt;
+ msg->resetbuscommandcount =
+ ha->stats.scsi_stats.bus_reset_success_cnt +
+ ha->stats.scsi_stats.bus_reset_fail_cnt;
+ msg->linkdowncount = ha->stats.fc_stats.link_dn_cnt;
+ msg->discinfoupdatecount = ha->stats.fc_stats.disc_info_cnt;
+ msg->targetlostcount = ha->stats.fc_stats.rscn_dn_cnt +
+ ha->stats.fc_stats.rscn_multiple_dn_cnt;
+ msg->targetfoundcount = ha->stats.fc_stats.rscn_up_cnt +
+ ha->stats.fc_stats.rscn_multiple_up_cnt;
+ msg->cqpdisconnectcount = ha->stats.ib_stats.cqp_dn_cnt;
+ msg->dqpdisconnectcount = ha->stats.ib_stats.dqp_dn_cnt;
+ msg->cqpibsenterrorcount = ha->stats.ib_stats.cqp_send_err_cnt;
+ msg->dqpibsenterrorcount = ha->stats.ib_stats.dqp_send_err_cnt;
+ msg->cqpibreceiveerrorcount =
+ ha->stats.ib_stats.cqp_recv_err_cnt;
+ msg->dqpibreceiverrrorcount =
+ ha->stats.ib_stats.dqp_recv_err_cnt;
+ msg->cqpibremotedisconnecterrorcount = 0;
+ msg->dqpibremotedisconnecterrorcount = 0;
+ }
+ msg->code = 0;
+ DEC_REF_CNT(vhba);
+ vhba_xsmp_ack(vhba->xsmp_hndl, (u8 *) msg, sizeof(struct _vhba_stats));
+out:
+ kfree(xwork->msg);
+ kfree(xwork);
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/fs.h>
+#include "vhba_defs.h"
+#include "vhba_ib.h"
+#include "vhba_xsmp.h"
+#include "vhba_scsi_intf.h"
+
+#define VHBA_WORKQUEUE "xsvhba_wq"
+#define VHBA_MAX_DEL_TRY 3
+#define VHBA_MAX_TEAR_DOWN_TRY 3
+
+struct delayed_work vhba_main_work;
+struct workqueue_struct *vhba_workqueuep;
+struct reconn_sts {
+ int idr;
+ int cqp_hdl;
+ int dqp_hdl;
+};
+struct reconn_sts reconn_st[MAX_VHBAS];
+
+void vhba_internal_processing(void)
+{
+ int i = 0;
+ int reconn_count = 0;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+ unsigned long flags;
+
+ read_lock_irqsave(&vhba_global_lock, flags);
+ list_for_each_entry(vhba, &vhba_g.list, list) {
+ int got_handle = 0;
+
+ ha = vhba->ha;
+
+ /* Check IB is dead or not */
+ ib_link_dead_poll(ha);
+
+ if (atomic_read(&ha->qp_status) == VHBA_QP_RECONNECTING)
+ vhba->qp_poll_count++;
+ else
+ vhba->qp_poll_count = 0;
+
+ /*
+ * If we are stuck in VHBA_QP_RECONNECTING for 60+ seconds,
+ * let us try to force reconnect
+ */
+ if (vhba->qp_poll_count >= 12) {
+ reconn_st[i].idr = vhba->idr;
+ goto reconnect;
+ }
+
+ /*
+ * Check if IOP lost the QP context. Send a heartbeat
+ * to revive it.
+ */
+
+ if (atomic_read(&vhba->ha->ib_status) == VHBA_IB_UP) {
+ if (vhba_check_heart_beat(vhba))
+ vhba->heartbeat_count++;
+ else
+ vhba->heartbeat_count = 0;
+ }
+
+ if (vhba->heartbeat_count >= 12) {
+ dprintk(TRC_WQ, vhba,
+ "Sending hearbeat for QP context recovery\n");
+ (void)vhba_send_heart_beat(vhba);
+ vhba->heartbeat_count = 0;
+ }
+
+ if (atomic_read(&ha->qp_status) == VHBA_QP_TRYCONNECTING) {
+ if (vhba->reconn_try_cnt < VHBA_MAX_TEAR_DOWN_TRY) {
+ vhba->reconn_try_cnt++;
+ continue;
+ }
+ vhba->reconn_attempt++;
+ dprintk(TRC_WQ, vhba,
+ "QP Marked for reconnect: idr=%d\n", vhba->idr);
+ reconn_st[i].idr = vhba->idr;
+ got_handle = 1;
+ i++;
+ }
+
+ if (!got_handle)
+ continue;
+
+reconnect:
+ vhba->reconn_try_cnt = 0;
+ reconn_count++;
+ }
+ read_unlock_irqrestore(&vhba_global_lock, flags);
+
+ for (i = 0; i < reconn_count; i++) {
+ vhba = vhba_get_context_by_idr(reconn_st[i].idr);
+ if (vhba == NULL) {
+ dprintk(TRC_WQ, NULL, "No matching vhba for idr=%d\n",
+ reconn_st[i].idr);
+ continue;
+ }
+ ha = vhba->ha;
+ vhba_xsmp_notify(vhba->xsmp_hndl,
+ vhba->resource_id, XSMP_VHBA_OPER_DOWN);
+ vhba_ib_disconnect_qp(vhba);
+
+ vhba_purge_pending_ios(vhba);
+
+ dprintk(TRC_INFO, vhba, "Trying to reconnect QP\n");
+ vhba_ib_connect_qp(vhba);
+ DEC_REF_CNT(vhba);
+ }
+}
+
+int vhbawq_init(void)
+{
+ vhba_workqueuep = create_singlethread_workqueue(VHBA_WORKQUEUE);
+ if (vhba_workqueuep == NULL)
+ return -1;
+
+ return 0;
+}
+
+int vhbawq_cleanup(void)
+{
+ cancel_delayed_work(&vhba_main_work);
+ flush_workqueue(vhba_workqueuep);
+ destroy_workqueue(vhba_workqueuep);
+ return 0;
+}
+
+int vhbawq_queue(void)
+{
+ INIT_DELAYED_WORK(&vhba_main_work, vhba_workqueue_processor);
+ queue_delayed_work(vhba_workqueuep, &vhba_main_work,
+ WQ_PERIODIC_TIMER * HZ);
+ return 0;
+}
+
+void vhba_workqueue_processor(struct work_struct *work)
+{
+ vhba_internal_processing();
+ vhbawq_queue();
+}
+
+int vhba_check_heart_beat(struct virtual_hba *vhba)
+{
+ int tgt;
+ int tgt_dead = 0;
+ int ret = 0;
+ struct os_tgt *tq;
+ struct scsi_xg_vhba_host *ha = vhba->ha;
+
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ tq = TGT_Q(ha, tgt);
+ if (!tq)
+ continue;
+ if (atomic_read(&tq->fcport->state) == FCS_DEVICE_DEAD) {
+ tgt_dead = 1;
+ break;
+ }
+ }
+
+ if ((tgt_dead == 1) ||
+ (vhba->ha->target_count == 0) ||
+ (atomic_read(&ha->link_state) == LINK_DEAD)) {
+ ret = 1;
+ }
+
+ return ret;
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#ifdef CONFIG_SUSE_KERNEL
+#include <linux/hardirq.h>
+#endif
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+#include "vhba_ib.h"
+#include "xsmp_session.h"
+#include "vhba_defs.h"
+
+static int vhba_swap_bytes(int direction, u8 *msg);
+
+int vhba_xsmp_send_msg(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
+{
+ if (vhba_swap_bytes(H_TO_N, data)) {
+ eprintk(NULL,
+ "Error - byte order conversion gone ");
+ eprintk(NULL, "wrong! Returning -1\n");
+ return -1;
+ }
+
+ return xcpm_send_message(xsmp_hndl, vhba_xsmp_service_id, data, length);
+}
+
+int vhba_xsmp_ack(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
+{
+ int new_length = length + sizeof(struct xsmp_message_header);
+ struct xsmp_message_header *m_header;
+ u8 *msg_offset;
+ int ret;
+ u8 *msg = kmalloc(new_length, GFP_ATOMIC);
+
+ if (!msg) {
+ eprintk(NULL,
+ "Error - alloc for vhba xsmp_send_ack failed.");
+ eprintk(NULL, " Returning 1\n");
+ return 1;
+ }
+ m_header = (struct xsmp_message_header *)msg;
+ msg_offset = msg + sizeof(struct xsmp_message_header);
+
+ memset(msg, 0, sizeof(struct xsmp_message_header));
+
+ m_header->type = XSMP_MESSAGE_TYPE_VHBA;
+ m_header->length = new_length;
+ m_header->seq_number = 0;
+
+ memcpy(msg_offset, data, length);
+ /* msg freed by callee */
+ ret = vhba_xsmp_send_msg(xsmp_hndl, msg, new_length);
+
+ return ret;
+}
+
+int vhba_xsmp_nack(xsmp_cookie_t xsmp_hndl,
+ u8 *data, int length, enum vhba_xsmp_error_codes nack_code)
+{
+ int new_length = length + sizeof(struct xsmp_message_header);
+ struct xsmp_message_header *m_header;
+ u8 *msg_offset;
+ int ret = 0;
+ u8 *msg = kmalloc(new_length, GFP_ATOMIC);
+
+ if (!((nack_code > VHBA_NACK_INVALID)
+ && (nack_code < VHBA_NACK_CODE_MAX))) {
+ eprintk(NULL, "Error - invalid nack code %d\n", nack_code);
+ }
+ if (!msg) {
+ eprintk(NULL,
+ "Error - alloc for vhba xsmp_send_nack failed.");
+ eprintk(NULL, " Returning 1\n");
+ return 1;
+ }
+ m_header = (struct xsmp_message_header *)msg;
+ msg_offset = msg + sizeof(struct xsmp_message_header);
+ memset(msg, 0, sizeof(struct xsmp_message_header));
+
+ m_header->type = XSMP_MESSAGE_TYPE_VHBA;
+ m_header->length = new_length;
+ m_header->seq_number = 0;
+
+ memcpy(msg_offset, data, length);
+
+ ((struct vhba_xsmp_msg *)msg_offset)->code = nack_code | XSMP_VHBA_NACK;
+ /* msg freed by callee */
+ ret = vhba_xsmp_send_msg(xsmp_hndl, msg, new_length);
+ return ret;
+}
+
+int vhba_xsmp_notify(xsmp_cookie_t xsmp_hndl, u64 resource_id, int notifycmd)
+{
+ int length = sizeof(struct xsmp_message_header) +
+ sizeof(struct vhba_xsmp_msg);
+ int prio = (in_interrupt()) ? GFP_ATOMIC : GFP_KERNEL;
+ int ret;
+ struct xsmp_message_header *header;
+ struct vhba_xsmp_msg *xsmp_msg;
+ u8 *msg = kmalloc(length, prio);
+
+ if (!msg) {
+ eprintk(NULL,
+ "Error - alloc for vhba xsmp_send_nack failed.");
+ eprintk(NULL, " Returning 1\n");
+ return 1;
+ }
+ header = (struct xsmp_message_header *)msg;
+ xsmp_msg = (struct vhba_xsmp_msg *)(msg + sizeof(*header));
+
+ memset(msg, 0, length);
+ header->type = XSMP_MESSAGE_TYPE_VHBA;
+ header->length = length;
+
+ xsmp_msg->type = notifycmd;
+ xsmp_msg->length = sizeof(struct vhba_xsmp_msg);
+ xsmp_msg->resource_id = resource_id;
+
+ ret = vhba_xsmp_send_msg(xsmp_hndl, msg, length);
+ if (ret) {
+ eprintk(NULL, "Error sending xsmp message %d\n", ret);
+ kfree(msg);
+ }
+ return ret;
+}
+
+static void vhba_sync_begin(struct work_struct *work)
+{
+ struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+ work);
+
+ xsmp_cookie_t xsmp_hndl = xwork->xsmp_hndl;
+ struct virtual_hba *vhba;
+ unsigned long flags = 0;
+
+ read_lock_irqsave(&vhba_global_lock, flags);
+ list_for_each_entry(vhba, &vhba_g.list, list) {
+ if (xsmp_sessions_match(&vhba->xsmp_info, xsmp_hndl)) {
+ dprintk(TRC_INFO,
+ vhba, "sync begin: xsmp_hndl=%p\n", xsmp_hndl);
+ vhba->xsmp_hndl = xsmp_hndl;
+#if 0
+ /*
+ * Because of bug on chassis sometimes VHBA's
+ * get deleted
+ */
+ vhba->sync_needed = 1;
+#endif
+ }
+ }
+ read_unlock_irqrestore(&vhba_global_lock, flags);
+
+ kfree(xwork->msg);
+ kfree(xwork);
+}
+
+static void vhba_sync_end(struct work_struct *work)
+{
+ struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+ work);
+
+ xsmp_cookie_t xsmp_hndl = xwork->xsmp_hndl;
+ struct virtual_hba *vhba = NULL;
+ struct virtual_hba *tmp_vhba;
+ unsigned long flags = 0;
+
+ /* Delete all non-sync'ed VHBAs */
+ read_lock_irqsave(&vhba_global_lock, flags);
+ list_for_each_entry_safe(vhba, tmp_vhba, &vhba_g.list, list) {
+ if (xsmp_sessions_match(&vhba->xsmp_info, xsmp_hndl)) {
+ if (vhba->sync_needed) {
+ read_unlock_irqrestore(&vhba_global_lock,
+ flags);
+ dprintk(TRC_INFO, vhba,
+ "Deleting vhba on xsmp_hndl=%p\n",
+ xsmp_hndl);
+ vhba_delete(vhba->resource_id);
+ read_lock_irqsave(&vhba_global_lock, flags);
+ }
+ }
+ }
+ read_unlock_irqrestore(&vhba_global_lock, flags);
+ dprintk(TRC_INFO, NULL, "xsmp_hndl=%p\n", xsmp_hndl);
+ kfree(xwork->msg);
+ kfree(xwork);
+}
+
+static void vhba_xsmp_handle_oper_req(struct work_struct *work)
+{
+ struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+ work);
+
+ struct vhba_xsmp_msg *msg = (struct vhba_xsmp_msg *)xwork->msg;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha;
+ int qp_state = 0;
+
+ vhba = vhba_get_context_by_resource_id(msg->resource_id);
+
+ if (vhba == NULL)
+ goto out;
+ ha = vhba->ha;
+
+ qp_state = atomic_read(&ha->qp_status);
+ if (qp_state == VHBA_QP_CONNECTED) {
+ dprintk(TRC_XSMP, NULL,
+ "SYNC: sending oper state up for vhba %p ", vhba);
+ dprintk(TRC_XSMP, NULL, "due to oper req. QP state = %d",
+ qp_state);
+ vhba_xsmp_notify(xwork->xsmp_hndl, msg->resource_id,
+ XSMP_VHBA_OPER_UP);
+ } else {
+ dprintk(TRC_XSMP, NULL,
+ "SYNC: sending oper state down for vhba %p", vhba);
+ dprintk(TRC_XSMP, NULL, " due to oper req\n");
+ vhba_xsmp_notify(xwork->xsmp_hndl, msg->resource_id,
+ XSMP_VHBA_OPER_DOWN);
+ }
+ DEC_REF_CNT(vhba);
+out:
+ kfree(xwork->msg);
+ kfree(xwork);
+}
+
+void vhba_xsmp_create(struct work_struct *work)
+{
+ struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+ work);
+
+ struct vhba_xsmp_msg *msg = (struct vhba_xsmp_msg *)xwork->msg;
+
+ dprintk(TRC_XSMP, NULL, "Vhba: Type= %d Code= %d Len= %d BMask= %d\n",
+ msg->type, msg->code, msg->length, msg->bit_mask);
+
+ dprintk(TRC_XSMP, NULL, "Vhba: TCA_Lid= %d TS= %d Res_Id= %Lx\n",
+ ntohs(msg->tca_lid), msg->tapesupport, msg->resource_id);
+
+ dprintk(TRC_XSMP, NULL, "Vhba: BW= %d AS= %d QD= %d ET= %d\n",
+ msg->bandwidth, msg->adminstate, msg->scsiqueuedepth,
+ msg->executionthrottle);
+
+ dprintk(TRC_INFO, NULL, "INSTALL received for vhba:vid %s:0x%Lx\n",
+ msg->vh_name, msg->resource_id);
+ vhba_create(xwork->xsmp_hndl, msg);
+
+ kfree(xwork->msg);
+ kfree(xwork);
+}
+
+int vhba_update(xsmp_cookie_t xsmp_hndl, struct vhba_xsmp_msg *msg)
+{
+ struct scsi_xg_vhba_host *ha = NULL;
+ struct virtual_hba *vhba;
+ int ret = 0;
+
+ vhba = vhba_get_context_by_resource_id(msg->resource_id);
+
+ if (vhba == NULL) {
+ dprintk(TRC_XSMP_ERRS, NULL, "vhba not found\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ha = vhba->ha;
+
+ dprintk(TRC_XSMP, vhba,
+ "xg lid %x guid %llx msg lid %x ",
+ ntohs(vhba->cfg->tca_lid), be64_to_cpu(vhba->cfg->tca_guid),
+ ntohs(msg->tca_lid));
+ dprintk(TRC_XSMP, vhba, "guid %llx %x %llx\n",
+ be64_to_cpu(msg->tca_guid), msg->tca_lid,
+ msg->tca_guid);
+
+ if (msg->bit_mask & VHBA_XT_INFO_CHANGE) {
+ dprintk(TRC_XSMP, vhba, "bit mask is %ux\n", msg->bit_mask);
+ dprintk(TRC_XSMP,
+ vhba, "xg lid %x guid %llx msg lid %x guid %llx\n",
+ ntohs(vhba->cfg->tca_lid),
+ be64_to_cpu(vhba->cfg->tca_guid),
+ ntohs(msg->tca_lid), be64_to_cpu(msg->tca_guid));
+
+ /*
+ * Make this change to handle the case when
+ * the XCM sends an vhba_update message
+ * with the same TCA GUID and LID.
+ *
+ * We now ignore the message when the TCA GUID and
+ * LID are the same as ones we have stored.
+ */
+ if ((vhba->cfg->tca_lid == msg->tca_lid) &&
+ (vhba->cfg->tca_guid == msg->tca_guid)) {
+ dprintk(TRC_XSMP_ERRS, vhba,
+ "Received identical GUID and LID\n");
+ goto out1;
+ }
+ }
+
+ if (msg->bit_mask & VHBA_XT_STATE_DOWN) {
+ dprintk(TRC_XSMP, NULL, "XT state DOWN received.\n");
+ vhba_xsmp_stats.xt_state_dn_cnt++;
+ vhba_xsmp_notify(xsmp_hndl, vhba->resource_id,
+ XSMP_VHBA_OPER_DOWN);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ } else if (msg->bit_mask & VHBA_XT_INFO_CHANGE) {
+ atomic_set(&vhba->vhba_state, VHBA_STATE_BUSY);
+ dprintk(TRC_XSMP, vhba,
+ "Received new TCA LID and GUID.");
+ dprintk(TRC_XSMP, vhba, "Reconnecting QPs with new IB info\n");
+ vhba_xsmp_stats.tca_lid_changed_cnt++;
+
+ vhba->cfg->tca_lid = msg->tca_lid;
+ vhba->cfg->tca_guid = msg->tca_guid;
+
+ vhba->ctrl_conn.ctx.dguid = be64_to_cpu(msg->tca_guid);
+ vhba->data_conn.ctx.dguid = be64_to_cpu(msg->tca_guid);
+ vhba->ctrl_conn.ctx.dlid = be16_to_cpu(msg->tca_lid);
+ vhba->data_conn.ctx.dlid = be16_to_cpu(msg->tca_lid);
+
+ vhba_purge_pending_ios(vhba);
+ atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+ } else if (msg->bit_mask & VHBA_LDT_CHANGED) {
+ dprintk(TRC_XSMP, vhba,
+ "bit mask is %08x, Update IB timer=%d\n",
+ msg->bit_mask, msg->linkdowntimeout);
+ vhba->cfg->linkdowntimeout = msg->linkdowntimeout;
+ }
+out1:
+ DEC_REF_CNT(vhba);
+out:
+ return ret;
+}
+
+void vhba_xsmp_modify(struct work_struct *work)
+{
+ struct xsvhba_work *xwork =
+ container_of(work, struct xsvhba_work, work);
+
+ struct vhba_xsmp_msg *msg = (struct vhba_xsmp_msg *)xwork->msg;
+ int vhba_xsmp_length = sizeof(struct vhba_xsmp_msg);
+ int ret = 0;
+
+ ret = vhba_update(xwork->xsmp_hndl, msg);
+
+ if (!ret)
+ vhba_xsmp_ack(xwork->xsmp_hndl, (u8 *) msg, vhba_xsmp_length);
+ else
+ vhba_xsmp_nack(xwork->xsmp_hndl, (u8 *) msg, vhba_xsmp_length,
+ VHBA_NACK_GENERAL_ERROR);
+
+ kfree(xwork->msg);
+ kfree(xwork);
+}
+
+static void vhba_xsmp_delete(struct work_struct *work)
+{
+ struct xsvhba_work *xwork =
+ container_of(work, struct xsvhba_work, work);
+
+ struct vhba_xsmp_msg *msg = (struct vhba_xsmp_msg *)xwork->msg;
+ int vhba_xsmp_length = sizeof(struct vhba_xsmp_msg);
+ int ret = 0;
+
+ dprintk(TRC_INFO, NULL, "DELETE received for vhba:vid %s:0x%Lx\n",
+ msg->vh_name, msg->resource_id);
+ ret = vhba_delete(msg->resource_id);
+ if (ret == -EIO) {
+ dprintk(TRC_XSMP, NULL,
+ "delete failed. device busy, " "sending NACK\n");
+ vhba_xsmp_nack(xwork->xsmp_hndl, (u8 *) msg, vhba_xsmp_length,
+ VHBA_NACK_DEVICE_BUSY);
+ } else {
+ vhba_xsmp_ack(xwork->xsmp_hndl, (u8 *) msg, vhba_xsmp_length);
+ dprintk(TRC_XSMP, NULL, "sent ack\n");
+ }
+ kfree(xwork->msg);
+ kfree(xwork);
+}
+
+static void vhba_xsmp_boot_msg(struct work_struct *work)
+{
+ struct xsvhba_work *xwork =
+ container_of(work, struct xsvhba_work, work);
+
+ struct vhba_boot_info *msg = (struct vhba_boot_info *)xwork->msg;
+ struct virtual_hba *vhba;
+ struct scsi_xg_vhba_host *ha = NULL;
+ struct os_tgt *tq = NULL;
+ int i, x = 0;
+ int tgt;
+ union xg_tgt_wwpn boot_xwwpn;
+ union xg_tgt_wwpn mount_xwwpn;
+
+ vhba = vhba_get_context_by_resource_id(msg->resource_id);
+
+ if (vhba == NULL)
+ goto out;
+
+ ha = vhba->ha;
+
+ ha->boot_count = msg->boot_count;
+ ha->mount_count = msg->mount_count;
+ ha->mount_type = msg->mount_type;
+
+ dprintk(TRC_XSMP, vhba,
+ "Boot count = %d\t ", ha->boot_count);
+ dprintk(TRC_XSMP, vhba, "Mount count = %d\tMount type = %d\n",
+ ha->mount_count, ha->mount_type);
+
+ for (i = 0; i < ha->boot_count; i++) {
+ memset(&ha->sanboot[i], 0,
+ sizeof(struct host_san_vhba_list_sts));
+
+ memcpy(ha->sanboot[i].vh_name, msg->boot_devlist[i].vh_name,
+ VHBA_NAME_LEN);
+ ha->sanboot[i].wwn = msg->boot_devlist[i].wwn;
+ ha->sanboot[i].lun = msg->boot_devlist[i].lun;
+
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ tq = TGT_Q(ha, tgt);
+ if (!tq)
+ continue;
+ else {
+ for (x = 0; x < WWN_SIZE; x++)
+ boot_xwwpn.wwpn_t[7 - x] =
+ tq->fcport->port_name[x];
+ }
+ dprintk(TRC_XSMP, NULL,
+ "Boot (local target WWN) WWN = %Lx\n",
+ boot_xwwpn.wwpn_val);
+
+ if (tq && (boot_xwwpn.wwpn_val == ha->sanboot[i].wwn)) {
+ dprintk(TRC_XSMP, NULL,
+ "Found a wwn match for a valid trgt\n");
+ ha->sanboot[i].tgt_num =
+ tq->fcport->os_target_id;
+ }
+ }
+
+ dprintk(TRC_XSMP, vhba, "Boot device # %d\n", i);
+ dprintk(TRC_XSMP, vhba,
+ "vh_name: %s\tWWPN:0x%llx\t Lun: 0x%x\n",
+ ha->sanboot[i].vh_name, ha->sanboot[i].wwn,
+ ha->sanboot[i].lun);
+ }
+
+ for (i = 0; i < ha->mount_count; i++) {
+ memcpy(&ha->sanmount[i].vh_name,
+ &msg->mount_devlist[i].vh_name, VHBA_NAME_LEN);
+ ha->sanmount[i].wwn = msg->mount_devlist[i].wwn;
+ ha->sanmount[i].lun = msg->mount_devlist[i].lun;
+
+ for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+ tq = TGT_Q(ha, tgt);
+ if (!tq)
+ continue;
+ else {
+ for (x = 0; x < WWN_SIZE; x++)
+ mount_xwwpn.wwpn_t[7 - x] =
+ tq->fcport->port_name[x];
+ }
+ dprintk(TRC_XSMP, NULL,
+ "Mount(local target WWN) WWN = %Lx\n",
+ mount_xwwpn.wwpn_val);
+
+ if (tq &&
+ (mount_xwwpn.wwpn_val == ha->sanmount[i].wwn)) {
+ ha->sanmount[i].tgt_num =
+ tq->fcport->os_target_id;
+
+ dprintk(TRC_XSMP, NULL,
+ "Found a wwpn match for a valid trgt");
+ dprintk(TRC_XSMP, NULL,
+ "Tgt id = %d (%d)\n",
+ ha->sanmount[i].tgt_num,
+ tq->fcport->os_target_id);
+ }
+
+ }
+
+ dprintk(TRC_XSMP, vhba, "Mount device # %d\n", i);
+ dprintk(TRC_XSMP, vhba,
+ "vh_name: %s\tWWPN:0x%Lx\t ",
+ (char *)ha->sanmount[i].vh_name, ha->sanmount[i].wwn);
+ dprintk(TRC_XSMP, vhba, "Lun: 0x%x\n", ha->sanmount[i].lun);
+ }
+
+ if (ha->mount_type == 1) {
+ memcpy(ha->host_lvm_info.logical_vol_group,
+ msg->logical_vol_group, VHBA_LVM_NAME_LEN);
+
+ memcpy(ha->host_lvm_info.logical_vol, msg->logical_vol,
+ VHBA_LVM_NAME_LEN);
+
+ dprintk(TRC_XSMP, vhba,
+ "Msg: Logical vol group: %s\tLogical vol = %s\n",
+ msg->logical_vol_group, msg->logical_vol);
+
+ } else if (ha->mount_type == 2) {
+ memcpy(ha->direct_mount_dev, msg->direct_mount_dev,
+ VHBA_LVM_NAME_LEN);
+
+ dprintk(TRC_XSMP, NULL, "Direct mount device = %s\n",
+ (char *)ha->direct_mount_dev);
+ }
+
+ memcpy(ha->mount_options, msg->mount_options, VHBA_MOUNT_OPT_LEN);
+
+ dprintk(TRC_XSMP, NULL, "Mount options = %s\n",
+ (char *)ha->mount_options);
+
+ vhba_xsmp_ack(xwork->xsmp_hndl, (u8 *) msg,
+ sizeof(struct vhba_boot_info));
+
+ DEC_REF_CNT(vhba);
+out:
+ kfree(xwork->msg);
+ kfree(xwork);
+}
+
+/* The interface function used by the XCPM to deliver messages */
+static int vhba_xsmp_msg_handler(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
+{
+ struct xsvhba_work *vhba_work;
+ void *xsmp_msg;
+ u8 *msg;
+ int type = 0;
+ int boot_type;
+ int ret = 0;
+
+ dprintk(TRC_XSMP, NULL, "New message, length <%d>\n", length);
+
+ vhba_work = kmalloc(sizeof(struct xsvhba_work), GFP_ATOMIC);
+ if (!vhba_work) {
+ eprintk(NULL, "vhba_work kmalloc failed\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (length < sizeof(struct xsmp_message_header)) {
+ eprintk(NULL, "Error - Message too short. Returning -1\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (vhba_swap_bytes(N_TO_H, data)) {
+ eprintk(NULL,
+ "Errors in the received message, dropping it. ");
+ eprintk(NULL, "Returning -1\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (*(u8 *) data != XSMP_MESSAGE_TYPE_VHBA) {
+ eprintk(NULL,
+ "Error - Wrong message type, not a VHBA message. ");
+ eprintk(NULL, "Returning -1\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (*(u16 *) (data + 2) != length)
+ dprintk(TRC_XSMP, NULL,
+ "Warning - lengths are not the same, ");
+ dprintk(TRC_XSMP, NULL, "header: 0x%x, actual: 0x%x\n",
+ *(u16 *) (data + 2), length);
+
+ msg = data + sizeof(struct xsmp_message_header);
+ length -= sizeof(struct xsmp_message_header);
+
+ boot_type = *msg;
+
+ if (boot_type == XSMP_VHBA_BOOT_INFO)
+ xsmp_msg = kmalloc(sizeof(struct vhba_boot_info), GFP_ATOMIC);
+ else
+ xsmp_msg = kmalloc(sizeof(struct vhba_xsmp_msg), GFP_ATOMIC);
+
+ if (!xsmp_msg) {
+ eprintk(NULL, "xsmp msg kmalloc failed\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (boot_type == XSMP_VHBA_BOOT_INFO)
+ memcpy(xsmp_msg, msg, sizeof(struct vhba_boot_info));
+ else
+ memcpy(xsmp_msg, msg, sizeof(struct vhba_xsmp_msg));
+
+ type = *(u8 *) xsmp_msg;
+ vhba_work->xsmp_hndl = xsmp_hndl;
+ vhba_work->msg = xsmp_msg;
+ vhba_work->len = length;
+ vhba_xsmp_stats.last_msg = type;
+
+ vhba_handle_xsmp_msg(type, vhba_work);
+
+out:
+ kfree(data);
+ return ret;
+}
+
+/* The interface functions exported to the XCPM as callbacks */
+void vhba_receive_handler(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
+{
+ vhba_xsmp_msg_handler(xsmp_hndl, data, length);
+}
+
+static int vhba_swap_bytes(int direction, u8 *msg)
+{
+ int rem_length = 0;
+ int vhba_xsmp_length = sizeof(struct vhba_xsmp_msg);
+ int num_messages = 0;
+ int count = 0;
+ int type = 0;
+ int i = 0;
+
+ if (direction == N_TO_H && (*(u8 *) msg == XSMP_MESSAGE_TYPE_VHBA))
+ rem_length = ntohs(*(u16 *) (msg + 2));
+ else if (direction == H_TO_N && (*(u8 *) msg == XSMP_MESSAGE_TYPE_VHBA))
+ rem_length = *(u16 *) (msg + 2);
+ else {
+ eprintk(NULL,
+ "Error - Hdr type not of a lcl msg. " "Returning -1\n");
+ return -1;
+ }
+
+ if (direction == H_TO_N)
+ dprintk(TRC_XSMP, NULL,
+ "Sending message: type <0x%x>, ", *(u16 *) (msg));
+ dprintk(TRC_XSMP, NULL, "length <0x%x>\n", *(u16 *) (msg + 2));
+
+ if (direction == N_TO_H)
+ dprintk(TRC_XSMP, NULL,
+ "Message received: XSMP type <0x%x>, ",
+ *(u8 *) (msg));
+ dprintk(TRC_XSMP, NULL, "length <0x%x>,sequence_number <0x%x>",
+ htons(*(u16 *) (msg + 2)),
+ htonl(*(u32 *) (msg + 4)));
+
+ /* Swizzle the header first */
+ msg += 2; /* Type */
+ *(u16 *) msg = htons(*(u16 *) msg); /* Length */
+ msg += 2;
+ *(u32 *) msg = htonl(*(u32 *) msg); /* Sequence number */
+ msg += 4;
+
+ /* Skip the source and destination IDs */
+ msg += 24;
+
+ rem_length -= sizeof(struct xsmp_message_header);
+
+ dprintk(TRC_XSMP, NULL,
+ "Msg payload length %d", rem_length);
+ dprintk(TRC_XSMP, NULL, " vhba_xsmp_length %d\n",
+ vhba_xsmp_length);
+
+ type = *(u8 *) (msg);
+ if (type == XSMP_VHBA_STATS) {
+ struct _vhba_stats *pstats = (struct _vhba_stats *)msg;
+
+ dprintk(TRC_XSMP, NULL, "received a stats message\n");
+ if (direction == N_TO_H) {
+ pstats->length = htons(pstats->length);
+ dprintk(TRC_XSMP, NULL, "length %d\n", pstats->length);
+ dprintk(TRC_XSMP, NULL,
+ "vid before (%llX)\n", pstats->vid);
+ pstats->vid = htonq(pstats->vid);
+ dprintk(TRC_XSMP, NULL,
+ "vid after (%llX)\n", pstats->vid);
+ } else if (direction == H_TO_N) {
+ pstats->vid = htonq(pstats->vid);
+ dprintk(TRC_XSMP, NULL,
+ "vid exit (%llX)\n", pstats->vid);
+ }
+ dprintk(TRC_XSMP, NULL, "action = %d", pstats->action);
+ return 0;
+ }
+
+ if (type == XSMP_VHBA_BOOT_INFO) {
+ struct vhba_boot_info *pboot = (struct vhba_boot_info *)msg;
+
+ dprintk(TRC_XSMP, NULL, "received a boot message\n");
+ if (direction == N_TO_H) {
+
+ pboot->length = ntohs(pboot->length);
+ pboot->resource_id = ntohq(pboot->resource_id);
+ pboot->boot_count = ntohs(pboot->boot_count);
+
+ for (i = 0; i < pboot->boot_count; i++) {
+ pboot->boot_devlist[i].wwn =
+ ntohq(pboot->boot_devlist[i].wwn);
+ dprintk(TRC_XSMP, NULL,
+ "WWN = %llx (%Lx)\n",
+ pboot->boot_devlist[i].wwn,
+ pboot->boot_devlist[i].wwn);
+ pboot->boot_devlist[i].lun =
+ ntohs(pboot->boot_devlist[i].lun);
+ dprintk(TRC_XSMP, NULL, "lun = %d\n",
+ pboot->boot_devlist[i].lun);
+ }
+
+ pboot->mount_type = ntohs(pboot->mount_type);
+ pboot->mount_count = ntohs(pboot->mount_count);
+
+ for (i = 0; i < pboot->mount_count; i++) {
+ dprintk(TRC_XSMP, NULL, "VHBA name = %s\n",
+ (char *)(pboot->mount_devlist[i].
+ vh_name));
+ pboot->mount_devlist[i].wwn =
+ ntohq(pboot->mount_devlist[i].wwn);
+ dprintk(TRC_XSMP, NULL, "WWN = %llx (%Lx)\n",
+ pboot->mount_devlist[i].wwn,
+ pboot->mount_devlist[i].wwn);
+ pboot->mount_devlist[i].lun =
+ ntohs(pboot->mount_devlist[i].lun);
+ dprintk(TRC_XSMP, NULL, "lun = %d\n",
+ pboot->mount_devlist[i].lun);
+
+ }
+ } else if (direction == H_TO_N)
+ dprintk(TRC_XSMP, NULL,
+ "Host to network message. ");
+ dprintk(TRC_XSMP, NULL, "Doing nothing for now\n");
+
+ return 0;
+ }
+
+ if (rem_length % vhba_xsmp_length != 0) {
+ eprintk(NULL,
+ "Error - Incorrect length XSMP header and payload,");
+ eprintk(NULL, " input_size(%d) header (%d)\n",
+ rem_length, vhba_xsmp_length);
+ return -1;
+ }
+
+ num_messages = rem_length / vhba_xsmp_length;
+
+ for (count = 0; count < num_messages; count++) {
+ struct vhba_xsmp_msg *payload = (struct vhba_xsmp_msg *)msg;
+
+ if (rem_length == 0)
+ return 0;
+
+ payload->length = htons(payload->length);
+ payload->bit_mask = htonl(payload->bit_mask);
+ payload->resource_id = htonq(payload->resource_id);
+
+ payload->vhba_flag = htons(payload->vhba_flag);
+ payload->mtu = htonl(payload->mtu);
+ payload->tapesupport = htons(payload->tapesupport);
+ payload->bandwidth = htons(payload->bandwidth);
+ payload->interruptdelaytimer =
+ htonl(payload->interruptdelaytimer);
+ payload->executionthrottle = htonl(payload->executionthrottle);
+ payload->scsiqueuedepth = htonl(payload->scsiqueuedepth);
+ payload->linkdowntimeout = htonl(payload->linkdowntimeout);
+
+ payload->adminstate = htonl(payload->adminstate);
+ payload->enabletargetreset = htonl(payload->enabletargetreset);
+ payload->maxlunspertarget = htonl(payload->maxlunspertarget);
+
+ msg += vhba_xsmp_length;
+ }
+ return 0;
+}
+
+void vhba_handle_xsmp_msg(int type, struct xsvhba_work *vhba_work)
+{
+
+ switch (type) {
+ case XSMP_VHBA_INSTALL:{
+ dprintk(TRC_XSMP, NULL,
+ "Received XSMP_VHBA_INSTALL msg\n");
+ vhba_xsmp_stats.install_msg_cnt++;
+ INIT_WORK(&vhba_work->work, vhba_xsmp_create);
+ queue_work(vhba_workqueuep, &vhba_work->work);
+ break;
+ }
+
+ case XSMP_VHBA_DELETE:{
+ dprintk(TRC_XSMP, NULL,
+ "Received XSMP_VHBA_DELETE msg\n");
+ vhba_xsmp_stats.delete_msg_cnt++;
+ INIT_WORK(&vhba_work->work, vhba_xsmp_delete);
+ queue_work(vhba_workqueuep, &vhba_work->work);
+ break;
+ }
+
+ case XSMP_VHBA_UPDATE:{
+ dprintk(TRC_XSMP, NULL,
+ "Received XSMP_VHBA_UPDATE msg\n");
+ vhba_xsmp_stats.update_msg_cnt++;
+ INIT_WORK(&vhba_work->work, vhba_xsmp_modify);
+ queue_work(vhba_workqueuep, &vhba_work->work);
+ break;
+ }
+
+ case XSMP_VHBA_STATS:{
+ dprintk(TRC_XSMP, NULL,
+ "Received XSMP_VHBA_STATS msg\n");
+ INIT_WORK(&vhba_work->work, vhba_xsmp_stats_req);
+ queue_work(vhba_workqueuep, &vhba_work->work);
+ vhba_xsmp_stats.cfg_stats_msg_cnt++;
+ break;
+ }
+
+ case XSMP_VHBA_SYNC_BEGIN:{
+ dprintk(TRC_XSMP, NULL,
+ "SYNC: Received XSMP_VHBA_SYNC_BEGIN msg\n");
+ vhba_xsmp_stats.sync_begin_msg_cnt++;
+ INIT_WORK(&vhba_work->work, vhba_sync_begin);
+ queue_work(vhba_workqueuep, &vhba_work->work);
+ break;
+ }
+
+ case XSMP_VHBA_SYNC_END:{
+ dprintk(TRC_XSMP, NULL,
+ "SYNC: Received XSMP_VHBA_SYNC_END msg\n");
+ vhba_xsmp_stats.sync_end_msg_cnt++;
+ INIT_WORK(&vhba_work->work, vhba_sync_end);
+ queue_work(vhba_workqueuep, &vhba_work->work);
+ break;
+ }
+
+ case XSMP_VHBA_OPER_REQ:{
+ dprintk(TRC_XSMP, NULL,
+ "SYNC: Received XSMP_VHBA_OPER_REQ\n");
+ vhba_xsmp_stats.oper_req_msg_cnt++;
+ INIT_WORK(&vhba_work->work, vhba_xsmp_handle_oper_req);
+ queue_work(vhba_workqueuep, &vhba_work->work);
+ break;
+ }
+
+ case XSMP_VHBA_BOOT_INFO:{
+ dprintk(TRC_XSMP, NULL,
+ "Received XSMP_VHBA_BOOT_INFO msg\n");
+ vhba_xsmp_stats.boot_msg_cnt++;
+ INIT_WORK(&vhba_work->work, vhba_xsmp_boot_msg);
+ queue_work(vhba_workqueuep, &vhba_work->work);
+ break;
+ }
+
+ default:{
+ dprintk(TRC_XSMP, NULL,
+ "Warning - Invalid session message. ");
+ dprintk(TRC_XSMP, NULL, "Returning -1\n");
+ vhba_xsmp_stats.unknown_msg_cnt++;
+ vhba_xsmp_stats.last_unknown_msg = type;
+ kfree(vhba_work);
+ }
+ }
+}
+
+int vhba_create_context(struct vhba_xsmp_msg *msg, struct virtual_hba *vhba)
+{
+ u32 idr;
+ int ret = 0;
+ unsigned long flags = 0;
+ struct virtual_hba *t_vhba;
+ u64 resource_id = msg->resource_id;
+
+ write_lock_irqsave(&vhba_global_lock, flags);
+ list_for_each_entry(t_vhba, &vhba_g.list, list) {
+ if (t_vhba->resource_id == resource_id) {
+ /*
+ * Already in the list, may have been due to sync-begin
+ * operation. Reset the sync flag and return
+ */
+ dprintk(TRC_INFO, t_vhba,
+ "vhba already in the list: vid 0x%Lx\n",
+ t_vhba->resource_id);
+ t_vhba->sync_needed = 0;
+ ret = 0;
+ goto out;
+ }
+ }
+
+ idr = idr_alloc(&vhba_idr_table, (void *)vhba, vhba_current_idr,
+ vhba_current_idr + 1, GFP_KERNEL);
+ if (idr < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ vhba->idr = idr;
+ vhba->resource_id = resource_id;
+ vhba_current_idr = idr + 1;
+ ret = 1;
+
+out:
+ write_unlock_irqrestore(&vhba_global_lock, flags);
+ return ret;
+}
+
+void vhba_add_context(struct virtual_hba *vhba)
+{
+ unsigned long flags = 0;
+
+ atomic_inc(&vhba->ref_cnt);
+ write_lock_irqsave(&vhba_global_lock, flags);
+ list_add_tail(&vhba->list, &vhba_g.list);
+ write_unlock_irqrestore(&vhba_global_lock, flags);
+}
+
+struct virtual_hba *vhba_remove_context(u64 resource_id)
+{
+ struct virtual_hba *vhba = NULL;
+ unsigned long flags = 0;
+
+ write_lock_irqsave(&vhba_global_lock, flags);
+ list_for_each_entry(vhba, &vhba_g.list, list) {
+ if (vhba->resource_id == resource_id)
+ goto out;
+ }
+ write_unlock_irqrestore(&vhba_global_lock, flags);
+ return NULL;
+out:
+ idr_remove(&vhba_idr_table, vhba->idr);
+ atomic_dec(&vhba->ref_cnt);
+ list_del(&vhba->list);
+ write_unlock_irqrestore(&vhba_global_lock, flags);
+ return vhba;
+}
+
+struct virtual_hba *vhba_get_context_by_resource_id(u64 resource_id)
+{
+ struct virtual_hba *vhba = NULL;
+ unsigned long flags = 0;
+
+ read_lock_irqsave(&vhba_global_lock, flags);
+ list_for_each_entry(vhba, &vhba_g.list, list) {
+ if (vhba->resource_id == resource_id)
+ goto out;
+ }
+ read_unlock_irqrestore(&vhba_global_lock, flags);
+ return NULL;
+out:
+ atomic_inc(&vhba->ref_cnt);
+ read_unlock_irqrestore(&vhba_global_lock, flags);
+ return vhba;
+}
+
+struct virtual_hba *vhba_get_context_by_idr(u32 idr)
+{
+ struct virtual_hba *vhba;
+ unsigned long flags = 0;
+
+ read_lock_irqsave(&vhba_global_lock, flags);
+ vhba = idr_find(&vhba_idr_table, idr);
+ if (vhba)
+ atomic_inc(&vhba->ref_cnt);
+
+ read_unlock_irqrestore(&vhba_global_lock, flags);
+ return vhba;
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __VHBA_XSMP_H__
+#define __VHBA_XSMP_H__
+
+#include <linux/types.h>
+#include <rdma/ib_verbs.h>
+#include <linux/workqueue.h>
+
+#include "xscore.h"
+#include "xsmp_common.h"
+#include "vhba_os_def.h"
+
+extern int vhba_xsmp_init(void);
+extern void vhba_xsmp_exit(void);
+extern struct fc_function_template vhba_transport_functions;
+
+#define XSMP_VHBA_INSTALL 1
+#define XSMP_VHBA_DELETE 2
+#define XSMP_VHBA_UPDATE 3
+#define XSMP_VHBA_ADMIN_UP 4
+#define XSMP_VHBA_ADMIN_DOWN 5
+#define XSMP_VHBA_OPER_UP 6
+#define XSMP_VHBA_OPER_DOWN 7
+#define XSMP_VHBA_OPER_READY 8
+#define XSMP_VHBA_STATS_REQ 9
+#define XSMP_VHBA_STATS 10
+#define XSMP_VHBA_SYNC_BEGIN 11
+#define XSMP_VHBA_SYNC_END 12
+#define XSMP_VHBA_INFO_REQUEST 13
+#define XSMP_VHBA_OPER_REQ 14
+#define XSMP_VHBA_BOOT_INFO 15
+#define XSMP_VHBA_TYPE_MAX 16
+
+#define VHBA_PORT_RATE_CHANGED 0x1
+#define VHBA_TAPE_SUPPORT_CHANGED 0x2
+#define VHBA_IDT_CHANGED 0x4
+#define VHBA_ET_CHANGED 0x8
+#define VHBA_SCSI_Q_DPTH_CHANGED 0x10
+#define VHBA_LDT_CHANGED 0x20
+#define VHBA_ADMINSTATE_CHANGED 0x100
+#define VHBA_TGT_RESET_CHANGED 0x40
+#define VHBA_LUNS_PER_TGT_CHANGED 0x80
+
+#define ADMINSTATE_DOWN 0x0
+#define ADMINSTATE_UP 0x1
+
+#define MAX_NUM_LINKS 32
+enum vhba_xsmp_error_codes {
+ VHBA_NACK_INVALID, /* 0 */
+ VHBA_NACK_DUP_NAME, /* 1 */
+ VHBA_NACK_DUP_VID, /* 2 */
+ VHBA_NACK_LIMIT_REACHED, /* 3 */
+ VHBA_NACK_ALLOC_ERROR, /* 4 */
+ VHBA_NACK_INVALID_STATE, /* 5 */
+ VHBA_NACK_DEVICE_BUSY, /* 6 */
+
+ VHBA_NACK_INS_APP_TIMEOUT, /* 7 */
+ VHBA_NACK_UNINST_APP_TIMEOUT, /* 8 */
+ VHBA_NACK_INS_APP_ERROR, /* 9 */
+ VHBA_NACK_UNINS_APP_ERROR, /* 10 */
+ VHBA_NACK_GENERAL_ERROR, /* 11 */
+ VHBA_NACK_LOCAL_DISABLED, /* 12 */
+
+ VHBA_NACK_HA_GROUP_NAME_MISMATCH, /* 13 */
+ VHBA_NACK_HA_MAC_ADDRESS_MISMATCH, /* 14 */
+ VHBA_NACK_HA_MTU_SIZE_MISMATCH, /* 15 */
+
+ VHBA_NACK_LA_GROUP_NAME_MISMATCH, /* 16 */
+ VHBA_NACK_LA_MAC_ADDRESS_MISMATCH, /* 17 */
+ VHBA_NACK_LA_MTU_SIZE_MISMATCH, /* 18 */
+ VHBA_NACK_LA_POLICY_MISMATCH, /* 19 */
+
+ VHBA_NACK_CODE_MAX, /* 20 */
+};
+
+/* Ack and Nack sent out in the 'code' field */
+#define XSMP_VHBA_ACK (1 << 6)
+#define XSMP_VHBA_NACK (1 << 7)
+
+#define H_TO_N 0
+#define N_TO_H 1
+
+#define ntohq be64_to_cpu
+#define htonq cpu_to_be64
+#define VHBA_NAME_LEN 16
+#define VHBA_MAX_BOOT_DEV 6
+#define VHBA_MAX_MOUNT_DEV 6
+#define VHBA_LVM_NAME_LEN 128
+#define VHBA_MOUNT_OPT_LEN 32
+
+struct san_vhba_list_sts {
+ u8 vh_name[VHBA_NAME_LEN];
+ u64 wwn;
+ u16 lun;
+} __packed;
+
+struct vhba_boot_info {
+ /* standard header fields */
+ u8 type;
+ u8 code;
+ u16 length;
+
+ u64 resource_id;
+
+ /* Count of boot devices specified */
+ u16 boot_count;
+ struct san_vhba_list_sts boot_devlist[VHBA_MAX_BOOT_DEV];
+
+ u16 mount_type; /* 1 = use logical vol group, 0 = use vhba */
+ u8 logical_vol_group[VHBA_LVM_NAME_LEN];
+ u8 logical_vol[VHBA_LVM_NAME_LEN];
+ u8 direct_mount_dev[VHBA_LVM_NAME_LEN];
+ u8 mount_options[VHBA_MOUNT_OPT_LEN];
+
+ u16 mount_count; /* count of mount devices */
+ struct san_vhba_list_sts mount_devlist[VHBA_MAX_MOUNT_DEV];
+
+ /*
+ * Padding reserves u8s to make the V* message size = 960.
+ * If you add new variables to the structure,
+ * you should adjust the paddings
+ */
+ u8 reserved[214];
+} __packed;
+
+struct vhba_xsmp_msg {
+ union {
+ struct {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 bit_mask;
+
+ u64 resource_id;
+ u64 wwn;
+ u64 tca_guid;
+
+ u16 tca_lid;
+ u16 vhba_flag;
+ u32 bandwidth;
+
+ u32 tapesupport;
+ u32 interruptdelaytimer;
+
+ u32 executionthrottle;
+ u32 scsiqueuedepth;
+
+ u32 linkdowntimeout;
+ u32 adminstate;
+
+ u32 enabletargetreset;
+ u32 maxlunspertarget;
+
+ u32 num_queues; /* Maximum 4 (0 .. 3) */
+ u8 vm_index;
+ u8 lunmask_enable;
+ u16 tca_slot;
+
+ u8 vh_name[VHBA_NAME_LEN];
+
+ struct {
+ /*
+ * Right now only one target,
+ * LUN combination per queue (default q 0)
+ * Actual rates are used only in I/O card side
+ */
+ u8 target[WWN_SIZE];
+ u32 lun;
+ } __packed q_classification[MAX_VHBA_QUEUES];
+
+ uint32_t mtu;
+
+ } __packed;
+ uint8_t bytes[512];
+ };
+} __packed;
+
+#define MAX_XSMP_MSG_SIZE sizeof(struct vhba_xsmp_msg)
+
+struct _vhba_stats_config_msg {
+ u8 type;
+ u8 code;
+ u16 length;
+
+ u32 data_class_id;
+ u32 collection_interval;
+ u32 updatesper_interval;
+ u32 updatefrequency;
+
+ /*
+ * Padding reserves u8s to make the V* message size = 512.
+ * If you add new variables to the structure,
+ * you should adjust the paddings
+ */
+ u8 reserved[492];
+} __packed;
+
+union _stats_obj_union {
+ struct vhba_xsmp_msg gen_config;
+ struct _vhba_stats_config_msg stats_config;
+
+ /*
+ * Padding reserves u8s to make the V* message size = 512.
+ * If you add new variables to the structure,
+ * you should adjust the paddings
+ */
+ u8 reserved[368];
+} __packed;
+
+struct _vhba_stats {
+ u8 type; /* Stats type (MIMM stats id) */
+ u8 code; /* NACK reason */
+ u16 length;
+ u8 action; /* clear = 1, otherwise = get */
+ u8 reserv[3];
+ u64 vid;
+ u64 statscookie;
+ u64 totalio;
+ u64 readbytecount;
+ u64 writebytecount;
+ u64 outstandingrequestcount;
+ u64 iorequestcount;
+ u64 readrequestcount;
+ u64 writerequestcount;
+ u64 taskmanagementrequestcount;
+ u64 targetcount;
+ u64 luncount;
+ u64 xsmpxtdowncount;
+ u64 xsmpxtoperstaterequestcount;
+ u64 mapfmrcount;
+ u64 ummapfmrcount;
+ u64 usedmapfmrcount;
+ u64 abortcommandcount;
+ u64 resetluncommandcount;
+ u64 resettargetcommandcount;
+ u64 resetbuscommandcount;
+ u64 linkdowncount;
+ u64 discinfoupdatecount;
+ u64 targetlostcount;
+ u64 targetfoundcount;
+ u64 cqpdisconnectcount;
+ u64 dqpdisconnectcount;
+ u64 cqpibsenterrorcount;
+ u64 dqpibsenterrorcount;
+ u64 cqpibreceiveerrorcount;
+ u64 dqpibreceiverrrorcount;
+ u64 cqpibremotedisconnecterrorcount;
+ u64 dqpibremotedisconnecterrorcount;
+
+ /*
+ * Padding reserves u8s to make the V* message size = 512.
+ * If you add new variables to the structure,
+ * you should adjust the paddings
+ */
+ u8 reserved[240];
+} __packed;
+
+struct vhba_wq_msg {
+ struct work_struct *work;
+ u32 idr;
+ void *data;
+ struct ib_link_info *link;
+};
+
+extern void vhba_receive_handler(xsmp_cookie_t xsmp_hndl, u8 *data,
+ int length);
+extern void vhba_abort_handler(xsmp_cookie_t xsmp_hndl);
+
+extern int vhba_xsmp_service_id;
+
+int vhba_create(xsmp_cookie_t xsmp_hndl, struct vhba_xsmp_msg *msg);
+int vhba_delete(u64 resource_id);
+int vhba_update(xsmp_cookie_t xsmp_hndl, struct vhba_xsmp_msg *msg);
+int vhba_config_stats(xsmp_cookie_t xsmp_hndl,
+ union _stats_obj_union *vhba_stats_cfg);
+
+int vhba_create_context(struct vhba_xsmp_msg *, struct virtual_hba *);
+void vhba_add_context(struct virtual_hba *);
+struct virtual_hba *vhba_remove_context(u64);
+struct virtual_hba *vhba_get_context_by_idr(u32);
+struct virtual_hba *vhba_get_context_by_resource_id(u64);
+int vhba_check_context(struct virtual_hba *);
+
+#define VHBA_XT_STATE_DOWN (0x40000000)
+#define VHBA_XT_INFO_CHANGE (0x80000000)
+
+extern int vhba_debug;
+extern unsigned long vhba_wait_time;
+extern struct vhba_xsmp_stats vhba_xsmp_stats;
+
+extern void vhba_xsmp_stats_req(struct work_struct *work);
+extern int vhba_xsmp_notify(xsmp_cookie_t xsmp_hndl, u64 resource_id,
+ int notifycmd);
+extern int vhba_xsmp_send_msg(xsmp_cookie_t xsmp_hndl, u8 *data, int length);
+extern int vhba_xsmp_ack(xsmp_cookie_t xsmp_hndl, u8 *data, int length);
+int vhba_xsmp_ack(xsmp_cookie_t xsmp_hndl, u8 *data, int length);
+int vhba_xsmp_nack(xsmp_cookie_t xsmp_hndl, u8 *data, int length,
+ enum vhba_xsmp_error_codes);
+int stop_stats_collection(void);
+int insert_iocb(struct virtual_hba *, int val, void **r_ptr);
+extern int vhba_register_xsmp_service(void);
+extern void vhba_unregister_xsmp_service(void);
+extern void vhba_handle_xsmp_msg(int, struct xsvhba_work *vhba_work);
+
+#define DEBUG 1
+
+#define TRC_ERRORS 0x000001
+#define TRC_INIT 0x000002
+#define TRC_XSMP 0x000004
+#define TRC_XSMP_ERRS 0x000008
+#define TRC_IB 0x000010
+#define TRC_IB_ERRS 0x000020
+#define TRC_SCSI 0x000040
+#define TRC_SCSI_ERRS 0x000080
+#define TRC_FMR 0x000100
+#define TRC_FMR_ERRS 0x000200
+#define TRC_IO 0x000400
+#define TRC_UNALIGNED 0x000800
+#define TRC_PROC 0x001000
+#define TRC_ERR_RECOV 0x002000
+#define TRC_TIMER 0x004000
+#define TRC_CQP 0x008000
+#define TRC_SCAN 0x010000
+#define TRC_MGMT 0x020000
+#define TRC_STATS 0x040000
+#define TRC_FUNCS 0x080000
+#define TRC_WQ 0x100000
+#define TRC_INFO 0x200000
+
+#ifdef DEBUG
+#define eprintk(vhba, fmt, args...) \
+{ \
+ struct virtual_hba *v_hba = (struct virtual_hba *)vhba; \
+ if (v_hba != NULL) { \
+ if ((v_hba->cfg) && (v_hba->cfg->vh_name)) \
+ pr_info("<vhba %s> %s: " fmt, \
+ (char *) (v_hba->cfg->vh_name), \
+ __func__ , ## args); \
+ } else { \
+ pr_info("%s: " fmt, __func__ , ## args); \
+ } \
+}
+#else
+#define eprintk(fmt, args...)
+#endif
+
+#ifdef DEBUG
+#define dprintk(level, vhba, fmt, args...) \
+do { \
+ struct virtual_hba *v_hba = (struct virtual_hba *)vhba; \
+ if ((vhba_debug & level) == level) { \
+ if (v_hba != NULL) { \
+ if ((v_hba->cfg) && (v_hba->cfg->vh_name)) \
+ pr_info("<vhba %s> %s: " fmt, \
+ (char *) (v_hba->cfg->vh_name), \
+ __func__ , ## args); \
+ } else { \
+ pr_info("%s: " fmt, __func__ \
+ , ## args); \
+ } \
+ } \
+} while (0)
+
+#define vhba_debug(level, vhba, fmt, args...) \
+do { \
+ struct virtual_hba *v_hba = (struct virtual_hba *)vhba; \
+ if ((vhba_debug & level) == level) { \
+ if (v_hba != NULL) { \
+ if ((v_hba->cfg) && (v_hba->cfg->vh_name)) \
+ pr_info("<vhba %s> %32s: " fmt,\
+ (char *)(v_hba->cfg->vh_name), \
+ __func__ , ## args); \
+ } else { \
+ pr_info("%s: " fmt, __func__, \
+ ## args); \
+ } \
+ } \
+} while (0)
+#else
+#define dprintk(level, vhba, fmt, args...)
+#endif
+
+#define assert(expr) \
+do { \
+ if (!(expr)) { \
+ pr_info("Assertion failed! %s,%s,%s,line=%d\n", \
+ #expr, __FILE__, __func__, __LINE__); \
+ } \
+} while (0)
+
+int vhba_purge_pending_ios(struct virtual_hba *vhba);
+
+#endif /* __VHBA_XSMP_H__ */
--- /dev/null
+config INFINIBAND_XSVNIC
+ tristate "Xsigo Virtual NIC"
+ depends on INFINIBAND_XSCORE
+ ---help---
+ Support for the Xsigo vNIC Functionality.
--- /dev/null
+obj-$(CONFIG_INFINIBAND_XSVNIC) := xsvnic.o
+xsvnic-y := xsvnic_main.o xsvnic_stats.o
+
+ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\"
+ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS
+ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE
+ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT
+ccflags-y += -Idrivers/infiniband/ulp/xsigo/xscore
+ccflags-y += -Idrivers/infiniband/include
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSVNIC_H__
+#define __XSVNIC_H__
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+#include <linux/inet_lro.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+
+#include "xscore.h"
+#include "xsmp_common.h"
+#include "xsvnic_xsmp_msgs.h"
+#include "xsmp_session.h"
+
+#ifndef XSIGO_LOCAL_VERSION
+#define XSVNIC_DRIVER_VERSION "0.31"
+#else
+#define XSVNIC_DRIVER_VERSION XSIGO_LOCAL_VERSION
+#endif
+
+#define XSVNIC_MIN_PACKET_LEN 60
+#define XSVNIC_MAX_BUF_SIZE 1024
+#define XSVNIC_MACLIST_MAX 128
+#define TCA_SERVICE_ID 0x1001ULL
+#define XSVNIC_VLANLIST_MAX 500
+#define XS_RXBAT_HDRLEN 4
+#define RXBAT_FORMAT_OFFSET(a) ((a >> 30) & 0x3)
+#define RXBAT_FINAL_BIT(a) ((a >> 29) & 0x1)
+#define RXBAT_FRAG_LEN(a) (a & 0x3fff)
+
+#define GET_MAX(val, len) \
+ do { \
+ if ((val == 0) || ((len > val) && (len != 0))) \
+ val = len; \
+ } while (0)
+
+#define GET_MIN(val, len) \
+ do { \
+ if ((val == 0) || ((len < val) && (len != 0))) \
+ val = len; \
+ } while (0)
+
+#define CALC_MAX_PKT_RX(p, len) \
+ do { \
+ GET_MAX(p->counters[XSVNIC_RX_MAX_PKT], len); \
+ GET_MIN(p->counters[XSVNIC_RX_MIN_PKT], len); \
+ } while (0)
+
+#define CALC_MAX_PKT_TX(p, len) \
+ do { \
+ GET_MAX(p->counters[XSVNIC_TX_MAX_PKT], len); \
+ GET_MIN(p->counters[XSVNIC_TX_MIN_PKT], len); \
+ } while (0)
+
+#define CALC_MAX_MIN_TXTIME(p, time) \
+ do { \
+ unsigned long tot_time = (jiffies - time); \
+ GET_MAX(p->counters[XSVNIC_TX_MAX_TIME], tot_time); \
+ GET_MIN(p->counters[XSVNIC_TX_MIN_TIME], tot_time); \
+ } while (0)
+
+#define XSIGO_DUMP_PKT(a, b, c) \
+ do { \
+ if (xsvnic_debug & DEBUG_DUMP_PKTS) \
+ dumppkt(a, b, c); \
+ } while (0)
+
+#define XSIGO_DEVICE_PREFIX ""
+
+#define XSVNIC_IO_QP_TYPE_CONTROL 0
+#define XSVNIC_IO_QP_TYPE_DATA 1
+
+enum {
+ XSVNIC_CONN_INIT,
+ XSVNIC_CONN_CONNECTING,
+ XSVNIC_CONN_CONNECTED,
+ XSVNIC_CONN_DISCONNECTING,
+ XSVNIC_CONN_DISCONNECTED,
+ XSVNIC_CONN_ERROR
+};
+
+struct xsvnic_conn {
+ u8 type;
+ int state;
+ struct xscore_conn_ctx ctx;
+};
+
+/*
+ * Private data format passed in a connection request
+ */
+
+struct xt_cm_private_data {
+ u64 vid;
+ u16 qp_type;
+ u16 max_ctrl_msg_size;
+ u32 data_qp_type;
+#define XSVNIC_TSO_BIT (1 << 1)
+#define XSVNIC_RXBAT_BIT (1 << 2)
+#define XSVNIC_RXBAT_TIMER_BIT (1 << 3)
+} __packed;
+
+struct xsvnic_control_msg {
+ u8 type;
+ u8 _reserved;
+ u16 length;
+ u32 data;
+} __packed;
+
+/*lro specifics*/
+enum {
+ XSVNIC_MAX_LRO_DESCRIPTORS = 8,
+ XSVNIC_LRO_MAX_AGGR = 64,
+};
+
+/*
+ * Types for the control messages, events, and statistics
+ * sent using the 'struct xsvnic_control_msg' above
+ */
+enum xsvnic_control_msg_type {
+ XSVNIC_START_TX = 16,
+ XSVNIC_STOP_TX,
+ XSVNIC_START_RX,
+ XSVNIC_STOP_RX,
+ XSVNIC_RX_COALESCE_NUM_PACKETS,
+ XSVNIC_RX_COALESCE_MSECS,
+ XSVNIC_LINK_UP,
+ XSVNIC_LINK_DOWN,
+ XSVNIC_ASSIGN_IP,
+ XSVNIC_ASSIGN_VLAN,
+ XSVNIC_UNASSIGN_VLAN,
+ XSVNIC_STATS_REQUEST,
+ XSVNIC_STATS_RESPONSE,
+ XSVNIC_MAC_ADDRESS_REPORT,
+ XSVNIC_MULTICAST_LIST_SEND,
+ XSVNIC_START_RX_RESPONSE,
+ XSVNIC_VPORT_STATUS_UPDATE,
+ XSVNIC_MULTICAST_LIST_RESPONSE,
+ XSVNIC_HEART_BEAT,
+ MAX_XSVNIC_CTL_MSG_TYPE
+};
+
+struct xsvnic_start_rx_resp_msg {
+ u8 port_speed;
+};
+
+struct xsvnic_link_up_msg {
+ u8 port_speed;
+};
+
+enum xnic_bw {
+ XNIC_BW_0, /* link down state */
+ XNIC_BW_100MbPS,
+ XNIC_BW_10MbPS,
+ XNIC_BW_200MbPS,
+ XNIC_BW_500MbPS,
+ XNIC_BW_800MbPS,
+ XNIC_BW_1GbPS,
+ XNIC_BW_2GbPS,
+ XNIC_BW_3GbPS,
+ XNIC_BW_4GbPS,
+ XNIC_BW_5GbPS,
+ XNIC_BW_6GbPS,
+ XNIC_BW_7GbPS,
+ XNIC_BW_8GbPS,
+ XNIC_BW_9GbPS,
+ XNIC_BW_10GbPS,
+ XNIC_BW_UNKNOWN,
+};
+
+struct vlan_entry {
+ struct list_head vlan_list;
+ unsigned short vlan_id;
+};
+
+enum {
+ XSVNIC_SYNC_END_DEL_COUNTER,
+ XSVNIC_VNIC_INSTALL_COUNTER,
+ XSVNIC_VNIC_DEL_COUNTER,
+ XSVNIC_VNIC_DEL_NOVID_COUNTER,
+ XSVNIC_VNIC_UPDATE_COUNTER,
+ XSVNIC_VNIC_SYNC_BEGIN_COUNTER,
+ XSVNIC_VNIC_SYNC_END_COUNTER,
+ XSVNIC_VNIC_OPER_REQ_COUNTER,
+ XSVNIC_VNIC_UNSUP_XSMP_COUNTER,
+ XSVNIC_ISCSI_INFO_COUNTER,
+ XSVNIC_DEVICE_REMOVAL_COUNTER,
+ XSVNIC_MAX_GLOB_COUNTERS
+};
+
+enum {
+ XSVNIC_CTRL_HBEAT_COUNTER,
+ XSVNIC_DATA_HBEAT_COUNTER,
+ XSVNIC_HBEAT_ERR_COUNTER,
+ XSVNIC_NAPI_POLL_COUNTER,
+ XSVNIC_SHORT_PKT_COUNTER,
+ XSVNIC_TX_COUNTER,
+ XSVNIC_TX_SKB_TSO_COUNTER,
+ XSVNIC_TX_SKB_NOHEAD_COUNTER,
+ XSVNIC_TX_SKB_FREE_COUNTER,
+ XSVNIC_TX_SKB_FREE_COUNTER_REAP,
+ XSVNIC_TX_EXPAND_HEAD_COUNTER,
+ XSVNIC_TX_EXPAND_HEAD_ECNTR,
+ XSVNIC_TX_VLAN_COUNTER,
+ XSVNIC_TX_ERROR_COUNTER,
+ XSVNIC_TX_WRB_EXHAUST,
+ XSVNIC_TX_DROP_OPER_DOWN_COUNT,
+ XSVNIC_TX_SKB_ALLOC_ERROR_COUNTER,
+ XSVNIC_TX_EXPANDSKB_ERROR,
+ XSVNIC_TX_RING_FULL_COUNTER,
+ XSVNIC_RX_SKB_COUNTER,
+ XSVNIC_RX_SKB_ALLOC_COUNTER,
+ XSVNIC_RX_SENDTO_VLANGRP,
+ XSVNIC_RXBAT_PKTS,
+ XSVNIC_RX_SKB_FREE_COUNTER,
+ XSVNIC_RX_MAXBATED_COUNTER,
+ XSVNIC_RXBAT_BELOW_5SEGS,
+ XSVNIC_RXBAT_BTW_5_10SEGS,
+ XSVNIC_RXBAT_BTW_10_20SEGS,
+ XSVNIC_RXBAT_ABOVE_20SEGS,
+ XSVNIC_8KBAT_PKTS,
+ XSVNIC_RX_SKB_OFFLOAD_COUNTER,
+ XSVNIC_RX_SKB_OFFLOAD_FRAG_COUNTER,
+ XSVNIC_RX_SKB_OFFLOAD_NONIPV4_COUNTER,
+ XSVNIC_RX_ERROR_COUNTER,
+ XSVNIC_RX_QUOTA_EXCEEDED_COUNTER,
+ XSVNIC_RX_NOBUF_COUNTER,
+ XSVNIC_RX_MAX_PKT,
+ XSVNIC_RX_MIN_PKT,
+ XSVNIC_RX_LRO_AGGR_PKTS,
+ XSVNIC_RX_LRO_FLUSHED_PKT,
+ XSVNIC_RX_LRO_AVG_AGGR_PKTS,
+ XSVNIC_RX_LRO_NO_DESCRIPTORS,
+ XSVNIC_TX_MAX_PKT,
+ XSVNIC_TX_MIN_PKT,
+ XSVNIC_TX_MAX_TIME,
+ XSVNIC_TX_MIN_TIME,
+ XSVNIC_NAPI_SCHED_COUNTER,
+ XSVNIC_NAPI_NOTSCHED_COUNTER,
+ XSVNIC_PORT_LINK_UP_COUNTER,
+ XSVNIC_PORT_LINK_DOWN_COUNTER,
+ XSVNIC_DUP_PORT_LINK_UP_COUNTER,
+ XSVNIC_DUP_PORT_LINK_DOWN_COUNTER,
+ XSVNIC_START_RX_COUNTER,
+ XSVNIC_STOP_RX_COUNTER,
+ XSVNIC_START_RX_RESP_COUNTER,
+ XSVNIC_BAD_RX_RESP_COUNTER,
+ XSVNIC_OPEN_COUNTER,
+ XSVNIC_STOP_COUNTER,
+ XSVNIC_GETSTATS_COUNTER,
+ XSVNIC_SET_MCAST_COUNTER,
+ XSVNIC_MCAST_LIST_RESP_COUNTER,
+ XSVNIC_MCAST_LIST_NORESP_COUNTER,
+ XSVNIC_VLAN_RX_ADD_COUNTER,
+ XSVNIC_VLAN_RX_DEL_COUNTER,
+ XSVNIC_IOCTL_COUNTER,
+ XSVNIC_MAC_ADDR_CHNG,
+ XSVNIC_WDOG_TIMEOUT_COUNTER,
+ XSVNIC_OPER_REQ_COUNTER,
+ XSVNIC_XT_DOWN_COUNTER,
+ XSVNIC_XT_UPDATE_COUNTER,
+ XSVNIC_XT_LID_CHANGE_COUNTER,
+ XSVNIC_ADMIN_UP_COUNTER,
+ XSVNIC_ADMIN_DOWN_COUNTER,
+ XSVNIC_OPER_UP_STATE_COUNTER,
+ XSVNIC_QP_ERROR_COUNTER,
+ XSVNIC_IB_RECOVERY_COUNTER,
+ XSVNIC_IB_RECOVERED_COUNTER,
+ XSVNIC_IBLINK_DOWN_COUNTER,
+ XSVNIC_IBLINK_UP_COUNTER,
+ XSVNIC_CTRL_CONN_OK_COUNTER,
+ XSVNIC_CTRL_RDISC_COUNTER,
+ XSVNIC_CTRL_ERR_COUNTER,
+ XSVNIC_CTRL_RECV_ERR_COUNTER,
+ XSVNIC_DATA_CONN_OK_COUNTER,
+ XSVNIC_DATA_RDISC_COUNTER,
+ XSVNIC_DATA_ERR_COUNTER,
+ XSVNIC_SENT_OPER_UP_COUNTER,
+ XSVNIC_SENT_OPER_DOWN_COUNTER,
+ XSVNIC_SENT_OPER_STATE_FAILURE_COUNTER,
+ XSVNIC_SENT_OPER_STATE_SUCCESS_COUNTER,
+ XSVNIC_RX_DROP_STANDBY_COUNTER,
+ XSVNIC_TX_DROP_STANDBY_COUNTER,
+ XSVNIC_MAX_COUNTERS
+};
+
+struct ether_addr {
+ unsigned char addr[ETH_ALEN];
+};
+
+struct xsvnic_lro {
+ struct net_lro_mgr lro_mgr;
+ struct net_lro_desc lro_desc[XSVNIC_MAX_LRO_DESCRIPTORS];
+};
+
+struct xsvnic {
+ spinlock_t lock;
+ struct mutex mutex;
+ atomic_t ref_cnt;
+ struct completion done;
+ struct delayed_work sm_work;
+ unsigned long state;
+#define XSVNIC_SYNC_DIRTY 1
+#define XSVNIC_OS_ADMIN_UP 2
+#define XSVNIC_CHASSIS_ADMIN_UP 3
+#define XSVNIC_DELETING 4
+#define XSVNIC_SEND_ADMIN_STATE 5
+#define XSVNIC_PORT_LINK_UP 6
+#define XSVNIC_START_RX_SENT 7
+#define XSVNIC_START_RESP_RCVD 8
+#define XSVNIC_OPER_UP 9
+#define XSVNIC_STOP_RX_SENT 10
+#define XSVNIC_XT_DOWN 11
+#define XSVNIC_XT_STATE_CHANGE 12
+#define XSVNIC_SHUTDOWN 13
+#define XSVNIC_MCAST_LIST_SENT 14
+#define XSVNIC_RING_SIZE_CHANGE 15
+#define XSVNIC_RX_NOBUF 16
+#define XSVNIC_INTR_ENABLED 17
+#define XSVNIC_TRIGGER_NAPI_SCHED 18
+#define XSVNIC_IBLINK_DOWN 19
+#define XSVNIC_MCAST_LIST_PENDING 20
+#define XSVNIC_MCAST_LIST_TIMEOUT 21
+#define XSVNIC_CHASSIS_ADMIN_SHADOW_UP 22
+#define XSVNIC_OVER_QUOTA 23
+#define XSVNIC_TSO_CHANGE 24
+#define XSVNIC_RXBATCH_CHANGE 25
+#define XSVNIC_STATE_STDBY 26
+ struct list_head xsvnic_list;
+ struct list_head vlan_list;
+ struct ether_addr *mc_addrs;
+ int mc_count;
+ struct net_device *netdev;
+ struct net_device_stats stats;
+ struct napi_struct napi;
+ u8 lro_mode;
+ struct xsvnic_lro lro;
+#define XSVNIC_RECLAIM_COUNT 4
+ int reclaim_count;
+ u8 send_hbeat_flag;
+ int vlan_count;
+ xsmp_cookie_t xsmp_hndl;
+ u64 tca_guid;
+ u16 tca_lid;
+ struct xsvnic_conn ctrl_conn;
+ struct xsvnic_conn data_conn;
+ u32 counters[XSVNIC_MAX_COUNTERS];
+ u64 resource_id;
+ u32 bandwidth;
+ u32 mtu;
+ u64 mac;
+ char vnic_name[XSVNIC_MAX_NAME_SIZE];
+ u8 sl;
+ u16 mp_flag;
+ u8 mp_group[XSVNIC_MAX_NAME_SIZE];
+ u32 install_flag;
+ int port_speed;
+ struct xsmp_session_info xsmp_info;
+ struct xsvnic_iscsi_info iscsi_boot_info;
+ u8 ha_state;
+ int rx_ring_size;
+ int tx_ring_size;
+ int *budget;
+ unsigned long jiffies;
+ int sm_delay;
+ u8 iff_promisc;
+ u16 counters_cleared;
+ int page_order;
+ int is_tso;
+ int is_rxbatching;
+ int is_rxbat_operational;
+ struct vlan_group *vlgrp;
+ struct proc_dir_entry *vnic_dir;
+ int ix;
+};
+
+struct xsvnic_work {
+ struct work_struct work;
+ xsmp_cookie_t xsmp_hndl;
+ struct xsvnic *xsvnicp;
+ u8 *msg;
+ int len;
+ int status;
+};
+
+extern int xsvnic_debug;
+extern unsigned long xsvnic_wait_time;
+extern struct mutex xsvnic_mutex;
+extern struct list_head xsvnic_list;
+extern u32 xsvnic_counters[];
+extern int xsvnic_vlanaccel;
+
+extern void xsvnic_remove_procfs_root_entries(void);
+extern int xsvnic_create_procfs_root_entries(void);
+extern int xsvnic_add_proc_entry(struct xsvnic *vp);
+extern void xsvnic_remove_proc_entry(struct xsvnic *vp);
+extern int xsvnic_change_rxbatch(struct xsvnic *xsvnicp, int flag);
+
+extern int check_rxbatch_possible(struct xsvnic *xsvnicp, int flag);
+void xsvnic_count_segs(struct xsvnic *xsvnicp, char nr_segs, int pkt_len);
+int xsvnic_align_addr(char **start);
+void xsvnic_send_skb(struct xsvnic *xsvnicp, struct sk_buff *skb,
+ int curr_pkt_len, char chksum_offload);
+
+#define MODULE_NAME "XSVNIC"
+
+enum {
+ DEBUG_DRV_INFO = 0x00000001,
+ DEBUG_DRV_FUNCTION = 0x00000002,
+ DEBUG_XSMP_INFO = 0x00000004,
+ DEBUG_XSMP_FUNCTION = 0x00000008,
+ DEBUG_IOCTRL_INFO = 0x00000010,
+ DEBUG_IOCTRL_FUNCTION = 0x00000020,
+ DEBUG_RXBAT_FUNCTION = 0x00000040,
+ DEBUG_DUMP_PKTS = 0x00000080,
+};
+
+static inline void dumppkt(unsigned char *pkt, unsigned short len, char *name)
+{
+ int i = 0;
+ unsigned char *p = (unsigned char *)pkt;
+ char line[64] = { 0 };
+ char *cp = line;
+ char filter[] = "0123456789abcdef";
+ int printed_line = 0;
+
+ pr_info("%s DumpPacket of %d\n", name, len);
+
+ for (i = 0; i < (len - 1); i++) {
+ if ((i != 0) && (i % 8 == 0)) {
+ pr_info("%s\n", line);
+ memset(line, 0, sizeof(line));
+ cp = line;
+ printed_line = 1;
+ } else {
+ printed_line = 0;
+ }
+
+ if (*p > 0x0f)
+ *cp++ = filter[*p >> 4];
+ else
+ *cp++ = filter[0];
+
+ *cp++ = filter[*p++ & 0xf];
+ *cp++ = ':';
+ if (((len - i) == 1) && !printed_line) {
+ pr_info("%s\n", line);
+ memset(line, 0, sizeof(line));
+ cp = line;
+ }
+ }
+ *--cp = 0;
+}
+
+#define PRINT(level, x, fmt, arg...) \
+ printk(level "%s: " fmt, MODULE_NAME, ##arg)
+
+#define PRINT_CONDITIONAL(level, x, condition, fmt, arg...) \
+ do { \
+ if (condition) \
+ printk(level "%s: %s: "fmt, \
+ MODULE_NAME, x, ##arg); \
+ } while (0)
+
+#define DRV_PRINT(fmt, arg...) \
+ PRINT(KERN_INFO, "DRV", fmt, ##arg)
+#define DRV_ERROR(fmt, arg...) \
+ PRINT(KERN_ERR, "DRV", fmt, ##arg)
+
+#define DRV_FUNCTION(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "DRV", \
+ (xsvnic_debug & DEBUG_DRV_FUNCTION), \
+ fmt, ##arg)
+
+#define DRV_INFO(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "DRV", \
+ (xsvnic_debug & DEBUG_DRV_INFO), \
+ fmt, ##arg)
+
+#define XSMP_PRINT(fmt, arg...) \
+ PRINT(KERN_INFO, "XSMP", fmt, ##arg)
+#define XSMP_ERROR(fmt, arg...) \
+ PRINT(KERN_ERR, "XSMP", fmt, ##arg)
+
+#define XSMP_FUNCTION(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "XSMP", \
+ (xsvnic_debug & DEBUG_XSMP_FUNCTION), \
+ fmt, ##arg)
+
+#define XSMP_INFO(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "XSMP", \
+ (xsvnic_debug & DEBUG_XSMP_INFO), \
+ fmt, ##arg)
+#define IOCTRL_PRINT(fmt, arg...) \
+ PRINT(KERN_INFO, "IOCTRL", fmt, ##arg)
+#define IOCTRL_ERROR(fmt, arg...) \
+ PRINT(KERN_ERR, "IOCTRL", fmt, ##arg)
+
+#define IOCTRL_FUNCTION(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "IOCTRL", \
+ (xsvnic_debug & DEBUG_IOCTRL_FUNCTION), \
+ fmt, ##arg)
+
+#define IOCTRL_INFO(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "IOCTRL", \
+ (xsvnic_debug & DEBUG_IOCTRL_INFO), \
+ fmt, ##arg)
+#define IORXBAT_FUNC(fmt, arg...) \
+ PRINT_CONDITIONAL(KERN_INFO, \
+ "RXBAT", \
+ (xsvnic_debug & DEBUG_RXBAT_FUNCTION), \
+ fmt, ##arg)
+
+#if !defined(NETDEV_HW_ADDR_T_MULTICAST)
+
+static inline void netdev_mc_list_copy(struct xsvnic *xsvnicp)
+{
+ struct dev_mc_list *ha;
+ struct net_device *netdev = xsvnicp->netdev;
+ struct ether_addr *eaddr = xsvnicp->mc_addrs;
+
+ netdev_for_each_mc_addr(ha, netdev) {
+ ether_addr_copy(eaddr->addr, ha->dmi_addr);
+ eaddr++;
+ }
+}
+
+#else
+
+static inline void netdev_mc_list_copy(struct xsvnic *xsvnicp)
+{
+ struct netdev_hw_addr *ha;
+ struct net_device *netdev = xsvnicp->netdev;
+ struct ether_addr *eaddr = xsvnicp->mc_addrs;
+
+ netdev_for_each_mc_addr(ha, netdev) {
+ ether_addr_copy(eaddr->addr, ha->addr);
+ eaddr++;
+ }
+}
+
+#endif
+
+struct xs_vlan_header {
+ u32 tso_info;
+} __packed;
+
+struct xs_tso_header {
+ u32 tso_info;
+} __packed;
+
+struct xs_tsovlan_header {
+ u32 tso_info;
+ u32 vlan_info;
+} __packed;
+
+#endif /* __XSVNIC_H__ */
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <asm/byteorder.h>
+#include <linux/mii.h>
+#include <linux/tcp.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/netdevice.h>
+#include "xsvnic.h"
+#include "xscore.h"
+#include <xs_compat.h>
+
+MODULE_AUTHOR("Oracle corp (OVN-linux-drivers@oracle.com)");
+MODULE_DESCRIPTION("OVN XSVNIC network driver");
+MODULE_VERSION(XSVNIC_DRIVER_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+
+#ifndef NETIF_F_LRO
+#define NETIF_F_LRO NETIF_F_SW_LRO
+#endif
+static int napi_weight = 64;
+module_param(napi_weight, int, 0644);
+
+static int xsigo_session_service_id = -1;
+static int xsvnic_havnic = 1;
+module_param(xsvnic_havnic, int, 0644);
+
+int xsvnic_debug = 0x0;
+module_param(xsvnic_debug, int, 0644);
+
+static int xsvnic_force_csum_offload = 0x0;
+module_param(xsvnic_force_csum_offload, int, 0644);
+
+/*lro specifics*/
+int lro;
+static int lro_max_aggr = XSVNIC_LRO_MAX_AGGR;
+module_param(lro, int, 0444);
+module_param(lro_max_aggr, int, 0644);
+MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)");
+MODULE_PARM_DESC(lro_max_aggr,
+ "LRO: Max packets to be aggregated (default = 64)");
+
+static int multicast_list_disable;
+module_param(multicast_list_disable, int, 0644);
+
+static int xsvnic_hbeat_enable = 2;
+module_param(xsvnic_hbeat_enable, int, 0644);
+
+int xsvnic_rxring_size = 256;
+module_param(xsvnic_rxring_size, int, 0444);
+
+int xsvnic_txring_size = 256;
+module_param(xsvnic_txring_size, int, 0444);
+
+int xsvnic_highdma;
+module_param(xsvnic_highdma, int, 0644);
+
+int xsvnic_vlanaccel;
+module_param(xsvnic_vlanaccel, int, 0644);
+
+int xsvnic_rxbatching = 1;
+module_param(xsvnic_rxbatching, int, 0644);
+
+int xsvnic_report_10gbps;
+module_param(xsvnic_report_10gbps, int, 0644);
+
+int xsvnic_reclaim_count = XSVNIC_RECLAIM_COUNT;
+module_param(xsvnic_reclaim_count, int, 0644);
+
+int xsvnic_tx_queue_len = 1000;
+module_param(xsvnic_tx_queue_len, int, 0644);
+
+int xsvnic_tx_intr_mode;
+module_param(xsvnic_tx_intr_mode, int, 0644);
+
+int xsvnic_max_coal_frames;
+module_param(xsvnic_max_coal_frames, int, 0644);
+
+int xsvnic_coal_usecs = 100;
+module_param(xsvnic_coal_usecs, int, 0644);
+
+int xsvnic_rx_intr_mode;
+module_param(xsvnic_rx_intr_mode, int, 0644);
+
+int xsvnic_wait_in_boot = 1;
+module_param(xsvnic_wait_in_boot, int, 0644);
+
+int xsvnic_wait_per_vnic = 30;
+module_param(xsvnic_wait_per_vnic, int, 0644);
+
+unsigned long xsvnic_wait_time;
+static int xsvnic_xsmp_service_id = -1;
+struct list_head xsvnic_list;
+static spinlock_t xsvnic_lock;
+struct mutex xsvnic_mutex;
+static struct workqueue_struct *xsvnic_wq;
+static struct workqueue_struct *xsvnic_io_wq;
+u32 xsvnic_counters[XSVNIC_MAX_GLOB_COUNTERS];
+
+static void queue_sm_work(struct xsvnic *xsvnicp, int msecs);
+static void _xsvnic_set_multicast(struct xsvnic *xsvnicp);
+static void xsvnic_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data,
+ int len);
+static int xsvnic_remove_vnic(struct xsvnic *xsvnicp);
+static void xsvnic_send_cmd_to_xsigod(struct xsvnic *xsvnicp, int cmd);
+static void xsvnic_reclaim_tx_buffers(struct xsvnic *xsvnicp);
+static void handle_ring_size_change(struct xsvnic *xsvnicp);
+static void handle_rxbatch_change(struct xsvnic *xsvnicp);
+static int xsvnic_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+static void xsvnic_update_oper_state(struct xsvnic *xsvnicp);
+static void xsvnic_update_tca_info(struct xsvnic *xsvnicp,
+ struct xsvnic_xsmp_msg *xmsgp,
+ int set_oper_down);
+char *xsvnic_get_rxbat_pkts(struct xsvnic *xsvnicp, int *curr_seg_len,
+ char *start, char *is_last_pkt, int total_pkt_len);
+
+static inline int xsvnic_esx_preregister_setup(struct net_device *netdev)
+{
+ return 0;
+}
+
+static inline int xsvnic_esx_postregister_setup(struct net_device *netdev)
+{
+ return 0;
+}
+
+static inline void vmk_notify_uplink(struct net_device *netdev)
+{
+}
+
+static inline void xsvnic_process_pages(struct xsvnic *xsvnicp,
+ struct xscore_buf_info *binfo)
+{
+ struct page *page;
+ struct sk_buff *skb;
+ int tot_pkt_len, hdr_len, curr_pkt_len, page_offset = 0;
+ char *start, *copy_start;
+ char nr_segs = 0, is_last_seg = 1;
+
+ tot_pkt_len = binfo->sz;
+ page = binfo->cookie;
+ start = page_address(page) + page_offset;
+
+ do {
+ curr_pkt_len = 0;
+ copy_start = xsvnic_get_rxbat_pkts(xsvnicp, &curr_pkt_len,
+ start, &is_last_seg,
+ tot_pkt_len);
+
+ hdr_len = min((int)(XSVNIC_MIN_PACKET_LEN), curr_pkt_len);
+ skb = dev_alloc_skb(hdr_len + NET_IP_ALIGN);
+ if (!skb) {
+ pr_err("XSVNIC: %s unable to allocate skb\n", __func__);
+ put_page(page);
+ break;
+ }
+ skb_reserve(skb, NET_IP_ALIGN);
+ memcpy(skb->data, copy_start, hdr_len);
+
+ skb_fill_page_desc(skb, 0, page,
+ page_offset + hdr_len + XS_RXBAT_HDRLEN,
+ curr_pkt_len - hdr_len);
+
+ skb->data_len = curr_pkt_len - hdr_len;
+ skb->len += curr_pkt_len;
+ skb->tail += hdr_len;
+
+ if (!is_last_seg) {
+ start = copy_start + curr_pkt_len;
+ page_offset += XS_RXBAT_HDRLEN + curr_pkt_len +
+ xsvnic_align_addr(&start);
+ get_page(page);
+ }
+
+ xsvnic_send_skb(xsvnicp, skb, curr_pkt_len, 0);
+ nr_segs++;
+ } while (!is_last_seg);
+
+ xsvnic_count_segs(xsvnicp, nr_segs, tot_pkt_len);
+}
+
+static inline void xsvnic_dev_kfree_skb_any(struct sk_buff *skb)
+{
+ if (skb != NULL)
+ dev_kfree_skb_any(skb);
+ else
+ pr_err("%s Error skb is null\n", __func__);
+}
+
+/*
+ * All XSMP related protocol messages
+ */
+
+static void xsvnic_put_ctx(struct xsvnic *xsvnicp)
+{
+ atomic_dec(&xsvnicp->ref_cnt);
+}
+
+static int xsvnic_xsmp_send_msg(xsmp_cookie_t xsmp_hndl, void *data, int length)
+{
+ struct xsmp_message_header *m_header = data;
+ int ret;
+
+ m_header->length = cpu_to_be16(m_header->length);
+ ret = xcpm_send_message(xsmp_hndl, xsvnic_xsmp_service_id, data,
+ length);
+ if (ret)
+ xcpm_free_msg(data);
+ return ret;
+}
+
+static int xsvnic_xsmp_send_ack(xsmp_cookie_t xsmp_hndl,
+ struct xsvnic_xsmp_msg *xmsgp)
+{
+ void *msg;
+ struct xsmp_message_header *m_header;
+ int total_len = sizeof(*xmsgp) + sizeof(*m_header);
+
+ msg = xcpm_alloc_msg(total_len);
+ if (!msg)
+ return -ENOMEM;
+ m_header = (struct xsmp_message_header *)msg;
+ m_header->type = XSMP_MESSAGE_TYPE_VNIC;
+ m_header->length = total_len;
+
+ xmsgp->code = 0;
+
+ memcpy(msg + sizeof(*m_header), xmsgp, sizeof(*xmsgp));
+
+ return xsvnic_xsmp_send_msg(xsmp_hndl, msg, total_len);
+}
+
+static int xsvnic_xsmp_send_nack(xsmp_cookie_t xsmp_hndl, void *data,
+ int length, u8 code)
+{
+ void *msg;
+ struct xsmp_message_header *m_header;
+ int total_len = length + sizeof(struct xsmp_message_header);
+ struct xsvnic_xsmp_msg *xsmsgp = (struct xsvnic_xsmp_msg *)data;
+
+ msg = xcpm_alloc_msg(total_len);
+ if (!msg)
+ return -ENOMEM;
+ m_header = (struct xsmp_message_header *)msg;
+ m_header->type = XSMP_MESSAGE_TYPE_VNIC;
+ m_header->length = total_len;
+
+ xsmsgp->code = XSMP_XSVNIC_NACK | code;
+ memcpy(msg + sizeof(*m_header), data, length);
+ return xsvnic_xsmp_send_msg(xsmp_hndl, msg, total_len);
+}
+
+static int xsvnic_xsmp_send_notification(xsmp_cookie_t xsmp_hndl, u64 vid,
+ int notifycmd)
+{
+ int length = sizeof(struct xsmp_message_header) +
+ sizeof(struct xsvnic_xsmp_msg);
+ void *msg;
+ struct xsmp_message_header *header;
+ struct xsvnic_xsmp_msg *xsmp_msg;
+
+ msg = xcpm_alloc_msg(length);
+ if (!msg)
+ return -ENOMEM;
+
+ memset(msg, 0, length);
+
+ header = (struct xsmp_message_header *)msg;
+ xsmp_msg = (struct xsvnic_xsmp_msg *)(msg + sizeof(*header));
+
+ header->type = XSMP_MESSAGE_TYPE_VNIC;
+ header->length = length;
+
+ xsmp_msg->type = notifycmd;
+ xsmp_msg->length = cpu_to_be16(sizeof(*xsmp_msg));
+ xsmp_msg->resource_id = cpu_to_be64(vid);
+
+ return xsvnic_xsmp_send_msg(xsmp_hndl, msg, length);
+}
+
+static int xsvnic_xsmp_send_ha_state(struct xsvnic *xsvnicp, int ha_state)
+{
+ struct xsmp_message_header *header;
+ void *msg;
+ struct xsvnic_ha_info_msg *ha_info_msgp;
+ int length = sizeof(struct xsmp_message_header) +
+ sizeof(struct xsvnic_ha_info_msg);
+
+ msg = xcpm_alloc_msg(length);
+ if (!msg)
+ return -ENOMEM;
+
+ memset(msg, 0, length);
+ header = (struct xsmp_message_header *)msg;
+ header->type = XSMP_MESSAGE_TYPE_VNIC;
+ header->length = length;
+ ha_info_msgp = msg + sizeof(struct xsmp_message_header);
+ ha_info_msgp->type = XSMP_XSVNIC_HA_INFO;
+ ha_info_msgp->length = cpu_to_be16(sizeof(*ha_info_msgp));
+ ha_info_msgp->resource_id = cpu_to_be64(xsvnicp->resource_id);
+ ha_info_msgp->ha_state = ha_state;
+ return xsvnic_xsmp_send_msg(xsvnicp->xsmp_hndl, msg, length);
+}
+
+static int xsvnic_xsmp_send_oper_state(struct xsvnic *xsvnicp,
+ u64 vid, int state)
+{
+ int ret;
+ xsmp_cookie_t xsmp_hndl = xsvnicp->xsmp_hndl;
+ char *str = state == XSMP_XSVNIC_OPER_UP ? "UP" : "DOWN";
+
+ ret = xsvnic_xsmp_send_notification(xsmp_hndl, vid, state);
+ switch (state) {
+ case XSMP_XSVNIC_OPER_UP:
+ xsvnicp->counters[XSVNIC_SENT_OPER_UP_COUNTER]++;
+ break;
+ case XSMP_XSVNIC_OPER_DOWN:
+ xsvnicp->counters[XSVNIC_SENT_OPER_DOWN_COUNTER]++;
+ break;
+ }
+ if (ret) {
+ xsvnicp->counters[XSVNIC_SENT_OPER_STATE_FAILURE_COUNTER]++;
+ XSMP_INFO("%s:Oper %s notification failed for", __func__, str);
+ XSMP_INFO("resource_id: 0x%Lx\n", vid);
+ } else {
+ xsvnicp->counters[XSVNIC_SENT_OPER_STATE_SUCCESS_COUNTER]++;
+ XSMP_INFO("%s:Oper %s notification succeeded ", __func__, str);
+ XSMP_INFO("for resource_id: 0x%Lx\n", vid);
+ }
+
+ return ret;
+}
+
+/*
+ * Handle all IO path messaging here
+ * Called with mutex held
+ */
+static int xsvnic_send_start_stop(struct xsvnic *xsvnicp, int opcode)
+{
+ struct xsvnic_control_msg *header;
+ int len = sizeof(*header);
+ int ret;
+
+ if (xsvnicp->ctrl_conn.state != XSVNIC_CONN_CONNECTED)
+ return -ENOTCONN;
+ header = kmalloc(len, GFP_ATOMIC);
+ if (!header)
+ return -ENOMEM;
+
+ header->type = opcode;
+ /*
+ * Bug here where it needs to be swapped
+ */
+ header->length = sizeof(*header);
+ /*
+ * This is called with interrupts not disabled
+ */
+ ret = xscore_post_send(&xsvnicp->ctrl_conn.ctx, header, len, 0);
+ if (ret)
+ kfree(header);
+ if (opcode == XSVNIC_START_RX)
+ xsvnicp->counters[XSVNIC_START_RX_COUNTER]++;
+ else
+ xsvnicp->counters[XSVNIC_STOP_RX_COUNTER]++;
+ return ret;
+}
+
+static int xsvnic_send_vlan_list(struct xsvnic *xsvnicp, u16 *vlanp, int count,
+ int opcode)
+{
+ u8 *msg;
+ struct xsvnic_control_msg *header;
+ u16 *vp;
+ int len, i;
+ int ret;
+
+ if (xsvnicp->ctrl_conn.state != XSVNIC_CONN_CONNECTED)
+ return -ENOTCONN;
+ len = sizeof(*header) + (count * sizeof(u16));
+ msg = kmalloc(len, GFP_ATOMIC);
+ if (!msg)
+ return -ENOMEM;
+ vp = (u16 *) (msg + sizeof(*header));
+ for (i = 0; i < count; i++)
+ *vp++ = cpu_to_be16(*vlanp++);
+ header = (struct xsvnic_control_msg *)msg;
+ header->type = opcode;
+ header->length = cpu_to_be16(len);
+ ret = xscore_post_send(&xsvnicp->ctrl_conn.ctx, msg, len,
+ XSCORE_DEFER_PROCESS);
+ if (ret)
+ kfree(msg);
+ return ret;
+}
+
+static int xsvnic_send_allvlan_list(struct xsvnic *xsvnicp)
+{
+ int count = xsvnicp->vlan_count;
+ u16 *vlan_listp, *vp;
+ struct vlan_entry *vlan;
+ int ret;
+
+ if (count == 0)
+ return 0;
+
+ vlan_listp = kmalloc_array(count, sizeof(u16), GFP_ATOMIC);
+ if (!vlan_listp)
+ return -ENOMEM;
+ vp = vlan_listp;
+ list_for_each_entry(vlan, &xsvnicp->vlan_list, vlan_list)
+ * vp++ = vlan->vlan_id;
+ ret = xsvnic_send_vlan_list(xsvnicp, vlan_listp, count,
+ XSVNIC_ASSIGN_VLAN);
+ kfree(vlan_listp);
+ return ret;
+}
+
+/*
+ * Called with spin lock held
+ */
+
+static int xsvnic_send_multicast_list(struct xsvnic *xsvnicp, u8 *msg, int len,
+ int promisc)
+{
+ int ret;
+ struct xsvnic_control_msg *header;
+
+ header = (struct xsvnic_control_msg *)msg;
+ header->type = XSVNIC_MULTICAST_LIST_SEND;
+ /*
+ * This is a bug, needs swapping unfortunately the bug is in
+ * xvnd code and we need to carry the bug forward for backward
+ * compatibility
+ */
+ header->length = len;
+ header->data = promisc;
+ clear_bit(XSVNIC_MCAST_LIST_TIMEOUT, &xsvnicp->state);
+ ret = xscore_post_send(&xsvnicp->ctrl_conn.ctx, msg, len,
+ XSCORE_DEFER_PROCESS);
+ if (ret) {
+ kfree(msg);
+ return ret;
+ } else
+ set_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state);
+ return 0;
+}
+
+static void handle_port_link_change(struct xsvnic *xsvnicp, int linkup)
+{
+ if (linkup) {
+ set_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state);
+ netif_carrier_on(xsvnicp->netdev);
+ netif_wake_queue(xsvnicp->netdev);
+ } else {
+ clear_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state);
+ netif_carrier_off(xsvnicp->netdev);
+ netif_stop_queue(xsvnicp->netdev);
+ }
+}
+
+static int speed_arr[] = { 0, 100, 10, 20, 500, 800, 1000, 2000, 3000, 4000,
+ 5000, 6000, 7000, 8000, 9000, 10000
+};
+
+static int xsvnic_convert_speed(int sp)
+{
+ if (sp < 0 || sp >= (sizeof(speed_arr) / sizeof(int)))
+ return 1000;
+ return speed_arr[sp];
+}
+
+static void handle_vnic_control_msgs(struct work_struct *work)
+{
+ struct xsvnic_work *xwork = container_of(work, struct xsvnic_work,
+ work);
+ struct xsvnic *xsvnicp = xwork->xsvnicp;
+ struct xsvnic_control_msg *header =
+ (struct xsvnic_control_msg *)xwork->msg;
+ struct xsvnic_start_rx_resp_msg *resp;
+ struct xsvnic_link_up_msg *linkp;
+ unsigned long flags;
+
+ switch (header->type) {
+ case XSVNIC_START_RX_RESPONSE:
+ IOCTRL_INFO("VNIC: %s Start Rx Response\n", xsvnicp->vnic_name);
+ resp = (struct xsvnic_start_rx_resp_msg *)&header->data;
+ if (test_bit(XSVNIC_START_RX_SENT, &xsvnicp->state) &&
+ !test_bit(XSVNIC_START_RESP_RCVD, &xsvnicp->state)) {
+ xsvnicp->counters[XSVNIC_START_RX_RESP_COUNTER]++;
+ set_bit(XSVNIC_START_RESP_RCVD, &xsvnicp->state);
+ xsvnicp->port_speed =
+ xsvnic_convert_speed(resp->port_speed);
+ xsvnicp->jiffies = jiffies;
+ pr_info("XSVNIC: %s Port Speed %d Mbps\n",
+ xsvnicp->vnic_name, xsvnicp->port_speed);
+ /*
+ * Alright port is UP now enable carrier state
+ */
+ if (test_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state))
+ handle_port_link_change(xsvnicp, 1);
+ complete(&xsvnicp->done);
+ } else
+ xsvnicp->counters[XSVNIC_BAD_RX_RESP_COUNTER]++;
+ break;
+ case XSVNIC_LINK_UP:
+ if (!test_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state)) {
+ linkp = (struct xsvnic_link_up_msg *)&header->data;
+ xsvnicp->port_speed =
+ xsvnic_convert_speed(linkp->port_speed);
+ handle_port_link_change(xsvnicp, 1);
+ xsvnicp->counters[XSVNIC_PORT_LINK_UP_COUNTER]++;
+ pr_info("XSVNIC: %s Link Up, speed: %d Mbps\n",
+ xsvnicp->vnic_name, xsvnicp->port_speed);
+ } else {
+ xsvnicp->counters[XSVNIC_DUP_PORT_LINK_UP_COUNTER]++;
+ IOCTRL_INFO("VNIC: %s Duplicate Link Up message\n",
+ xsvnicp->vnic_name);
+ }
+ break;
+ case XSVNIC_LINK_DOWN:
+ if (test_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state)) {
+ handle_port_link_change(xsvnicp, 0);
+ xsvnicp->counters[XSVNIC_PORT_LINK_DOWN_COUNTER]++;
+ pr_info("XSVNIC: %s Link Down (Eth)\n",
+ xsvnicp->vnic_name);
+ } else {
+ xsvnicp->counters[XSVNIC_DUP_PORT_LINK_DOWN_COUNTER]++;
+ IOCTRL_INFO("VNIC: %s Duplicate Link Down message\n",
+ xsvnicp->vnic_name);
+ }
+ break;
+ case XSVNIC_MULTICAST_LIST_RESPONSE:
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ clear_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state);
+ clear_bit(XSVNIC_MCAST_LIST_TIMEOUT, &xsvnicp->state);
+ xsvnicp->counters[XSVNIC_MCAST_LIST_RESP_COUNTER]++;
+ if (test_and_clear_bit(XSVNIC_MCAST_LIST_PENDING,
+ &xsvnicp->state))
+ _xsvnic_set_multicast(xsvnicp);
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ break;
+ default:
+ IOCTRL_ERROR("VNIC: %s Unknown message type %d\n",
+ xsvnicp->vnic_name, header->type);
+ break;
+ }
+ kfree(xwork->msg);
+ kfree(xwork);
+ xsvnic_put_ctx(xsvnicp);
+}
+
+static void xsvnic_set_oper_down(struct xsvnic *xsvnicp, int lock)
+{
+ unsigned long flags = 0;
+
+ if (lock)
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ if (test_and_clear_bit(XSVNIC_OPER_UP, &xsvnicp->state)) {
+ netif_carrier_off(xsvnicp->netdev);
+ netif_stop_queue(xsvnicp->netdev);
+ clear_bit(XSVNIC_START_RX_SENT, &xsvnicp->state);
+ clear_bit(XSVNIC_START_RESP_RCVD, &xsvnicp->state);
+ clear_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state);
+ clear_bit(XSVNIC_OPER_UP, &xsvnicp->state);
+ clear_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state);
+ clear_bit(XSVNIC_MCAST_LIST_PENDING, &xsvnicp->state);
+ clear_bit(XSVNIC_OVER_QUOTA, &xsvnicp->state);
+ xsvnicp->ctrl_conn.state = XSVNIC_CONN_ERROR;
+ xsvnicp->data_conn.state = XSVNIC_CONN_ERROR;
+ xsvnic_xsmp_send_oper_state(xsvnicp, xsvnicp->resource_id,
+ XSMP_XSVNIC_OPER_DOWN);
+ xsvnicp->ha_state = XSVNIC_HA_STATE_UNKNOWN;
+ }
+ if (lock)
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+}
+
+static void xsvnic_ctrl_send_handler(void *client_arg, void *msg, int status,
+ int n)
+{
+ struct xsvnic *xsvnicp = client_arg;
+
+ IOCTRL_INFO("%s:Send Status %d, vnic: %s\n", __func__, status,
+ xsvnicp->vnic_name);
+ if (status) {
+ IOCTRL_ERROR("VNIC: %s Ctrl Send Completion error: %d\n",
+ xsvnicp->vnic_name, status);
+ xsvnicp->counters[XSVNIC_QP_ERROR_COUNTER]++;
+ xsvnic_set_oper_down(xsvnicp, 1);
+ }
+ kfree(msg);
+}
+
+/*
+ * Called from interrupt context
+ */
+static void xsvnic_ctrl_recv_handler(void *client_arg, void *msg, int sz,
+ int status, int n)
+{
+ struct xsvnic *xsvnicp = client_arg;
+ struct xsvnic_work *work;
+ unsigned long flags;
+
+ if (status) {
+ IOCTRL_ERROR("%s: Recv Completion error: status %d\n",
+ xsvnicp->vnic_name, status);
+ xsvnicp->counters[XSVNIC_CTRL_RECV_ERR_COUNTER]++;
+ xsvnic_set_oper_down(xsvnicp, 1);
+ kfree(msg);
+ return;
+ }
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work) {
+ kfree(msg);
+ return;
+ }
+ INIT_WORK(&work->work, handle_vnic_control_msgs);
+ work->xsvnicp = xsvnicp;
+ work->msg = msg;
+ work->len = sz;
+ work->status = status;
+
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ if (!test_bit(XSVNIC_DELETING, &xsvnicp->state)) {
+ atomic_inc(&xsvnicp->ref_cnt);
+ queue_work(xsvnic_io_wq, &work->work);
+ } else {
+ kfree(msg);
+ kfree(work);
+ }
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+}
+
+/*
+ * Data is pending, in interrupt context
+ */
+static void xsvnic_data_recv_handler(void *client_arg)
+{
+ struct xsvnic *xsvnicp = client_arg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ if (test_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state) &&
+ test_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state) &&
+ test_bit(XSVNIC_OPER_UP, &xsvnicp->state) &&
+ !test_bit(XSVNIC_DELETING, &xsvnicp->state)) {
+ xsvnicp->counters[XSVNIC_NAPI_SCHED_COUNTER]++;
+ clear_bit(XSVNIC_INTR_ENABLED, &xsvnicp->state);
+ napi_schedule(&xsvnicp->napi);
+ } else
+ xsvnicp->counters[XSVNIC_NAPI_NOTSCHED_COUNTER]++;
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+}
+
+static inline void xsvnic_conn_disconnect(struct xsvnic *xsvnicp,
+ struct xsvnic_conn *conn)
+{
+ conn->state = XSVNIC_CONN_DISCONNECTED;
+ /*
+ * Whenever we call xscore_conn_disconnect,
+ * make sure there are no mutexes held
+ */
+ mutex_unlock(&xsvnicp->mutex);
+ xscore_conn_disconnect(&conn->ctx, 0);
+ mutex_lock(&xsvnicp->mutex);
+}
+
+static void xsvnic_io_disconnect(struct xsvnic *xsvnicp)
+{
+ xsvnic_set_oper_down(xsvnicp, 1);
+ if (test_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state))
+ napi_synchronize(&xsvnicp->napi);
+ xsvnic_conn_disconnect(xsvnicp, &xsvnicp->ctrl_conn);
+ xsvnic_conn_disconnect(xsvnicp, &xsvnicp->data_conn);
+ if (test_bit(XSVNIC_RING_SIZE_CHANGE, &xsvnicp->state))
+ handle_ring_size_change(xsvnicp);
+ if (test_bit(XSVNIC_RXBATCH_CHANGE, &xsvnicp->state))
+ handle_rxbatch_change(xsvnicp);
+}
+
+static int xsvnic_send_data_hbeat(struct xsvnic *xsvnicp)
+{
+ struct sk_buff *skb;
+ struct arphdr *arp;
+ unsigned char *arp_ptr, *eth_ptr;
+ int ret;
+
+ skb = alloc_skb(XSVNIC_MIN_PACKET_LEN, GFP_ATOMIC);
+ if (skb == NULL)
+ return -ENOMEM;
+
+ eth_ptr = (unsigned char *)skb_put(skb, XSVNIC_MIN_PACKET_LEN);
+ ether_addr_copy(eth_ptr, xsvnicp->netdev->dev_addr);
+ eth_ptr += ETH_ALEN;
+ ether_addr_copy(eth_ptr, xsvnicp->netdev->dev_addr);
+ eth_ptr += ETH_ALEN;
+ *eth_ptr++ = (ETH_P_RARP >> 8) & 0xff;
+ *eth_ptr++ = ETH_P_RARP & 0xff;
+
+ arp = (struct arphdr *)eth_ptr;
+ arp->ar_hrd = htons(xsvnicp->netdev->type);
+ arp->ar_hln = xsvnicp->netdev->addr_len;
+ arp->ar_pln = 4;
+ arp->ar_op = htons(ARPOP_RREPLY);
+
+ arp_ptr = (unsigned char *)(arp + 1);
+
+ ether_addr_copy(arp_ptr, xsvnicp->netdev->dev_addr);
+ arp_ptr += xsvnicp->netdev->addr_len;
+ arp_ptr += 4;
+ ether_addr_copy(arp_ptr, xsvnicp->netdev->dev_addr);
+
+ skb_reset_network_header(skb);
+ skb->dev = xsvnicp->netdev;
+ skb->protocol = htons(ETH_P_RARP);
+
+ ret = xsvnic_start_xmit(skb, xsvnicp->netdev);
+ if (ret)
+ dev_kfree_skb_any(skb);
+
+ return 0;
+}
+
+static int xsvnic_send_ctrl_hbeat(struct xsvnic *xsvnicp)
+{
+ struct xsmp_message_header *header;
+ int ret;
+
+ header = kmalloc(sizeof(*header), GFP_ATOMIC);
+ if (!header)
+ return -ENOMEM;
+ header->type = XSVNIC_HEART_BEAT;
+ header->length = sizeof(*header);
+ ret = xscore_post_send(&xsvnicp->ctrl_conn.ctx, header,
+ sizeof(*header), 0);
+ if (ret)
+ kfree(header);
+ return ret;
+}
+
+/*
+ * Send heartbeat over control channel or data channel
+ */
+static int xsvnic_send_hbeat(struct xsvnic *xsvnicp)
+{
+ int ret = 0;
+
+ if (!xsvnic_hbeat_enable)
+ return 0;
+ if (xsvnic_hbeat_enable == 1) {
+ ret = xsvnic_send_ctrl_hbeat(xsvnicp);
+ xsvnicp->counters[XSVNIC_CTRL_HBEAT_COUNTER]++;
+ } else {
+ xsvnic_send_data_hbeat(xsvnicp);
+ xsvnicp->counters[XSVNIC_DATA_HBEAT_COUNTER]++;
+ }
+ return ret;
+}
+
+static void handle_ha_sm(struct xsvnic *xsvnicp)
+{
+ if ((xsvnicp->mp_flag & (MP_XSVNIC_PRIMARY |
+ MP_XSVNIC_SECONDARY)) == 0) {
+ xsvnicp->ha_state = XSVNIC_HA_STATE_ACTIVE;
+ return;
+ }
+ /*
+ * Check HA state and send update if things have changed
+ */
+ if (xsvnicp->ha_state == XSVNIC_HA_STATE_UNKNOWN) {
+ xsvnicp->ha_state = test_bit(XSVNIC_STATE_STDBY,
+ &xsvnicp->state)
+ ? XSVNIC_HA_STATE_STANDBY : XSVNIC_HA_STATE_ACTIVE;
+ xsvnic_xsmp_send_ha_state(xsvnicp, xsvnicp->ha_state);
+ } else if (xsvnicp->ha_state == XSVNIC_HA_STATE_ACTIVE &&
+ (test_bit(XSVNIC_STATE_STDBY, &xsvnicp->state))) {
+ xsvnicp->ha_state = XSVNIC_HA_STATE_STANDBY;
+ xsvnic_xsmp_send_ha_state(xsvnicp, xsvnicp->ha_state);
+ } else if (xsvnicp->ha_state == XSVNIC_HA_STATE_STANDBY &&
+ (!test_bit(XSVNIC_STATE_STDBY, &xsvnicp->state))) {
+ xsvnicp->ha_state = XSVNIC_HA_STATE_ACTIVE;
+ xsvnic_xsmp_send_ha_state(xsvnicp, xsvnicp->ha_state);
+ }
+}
+
+static void handle_hbeat_sm(struct xsvnic *xsvnicp)
+{
+ unsigned long flags;
+ /*
+ * Send heartbeat if send_hbeat_flag is set
+ */
+ if (xsvnicp->send_hbeat_flag) {
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ xsvnic_reclaim_tx_buffers(xsvnicp);
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ if (xsvnicp->ha_state == XSVNIC_HA_STATE_ACTIVE
+ && xsvnic_send_hbeat(xsvnicp)) {
+ xsvnicp->counters[XSVNIC_HBEAT_ERR_COUNTER]++;
+ xsvnic_set_oper_down(xsvnicp, 1);
+ }
+ }
+ xsvnicp->send_hbeat_flag = 1;
+}
+
+static void handle_ring_size_change(struct xsvnic *xsvnicp)
+{
+ int ret;
+
+ clear_bit(XSVNIC_RING_SIZE_CHANGE, &xsvnicp->state);
+ /*
+ * Now destroy ctx
+ */
+ xscore_conn_destroy(&xsvnicp->data_conn.ctx);
+ xsvnicp->data_conn.ctx.rx_ring_size = xsvnicp->rx_ring_size;
+ xsvnicp->data_conn.ctx.tx_ring_size = xsvnicp->tx_ring_size;
+
+ ret = xscore_conn_init(&xsvnicp->data_conn.ctx,
+ xsvnicp->xsmp_info.port);
+ if (ret)
+ DRV_ERROR("xscore_conn_init data error for VNIC %s, ret = %d\n",
+ xsvnicp->vnic_name, ret);
+}
+
+static void handle_multicast(struct xsvnic *xsvnicp)
+{
+ unsigned long flags;
+
+ if (test_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state)) {
+ if (test_bit(XSVNIC_MCAST_LIST_TIMEOUT, &xsvnicp->state)) {
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ xsvnicp->counters[XSVNIC_MCAST_LIST_NORESP_COUNTER]++;
+ clear_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state);
+ if (test_and_clear_bit
+ (XSVNIC_MCAST_LIST_PENDING, &xsvnicp->state))
+ _xsvnic_set_multicast(xsvnicp);
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ } else
+ set_bit(XSVNIC_MCAST_LIST_TIMEOUT, &xsvnicp->state);
+ }
+}
+
+static void handle_action_flags(struct xsvnic *xsvnicp)
+{
+ if (test_bit(XSVNIC_TRIGGER_NAPI_SCHED, &xsvnicp->state)) {
+ xsvnic_data_recv_handler(xsvnicp);
+ clear_bit(XSVNIC_TRIGGER_NAPI_SCHED, &xsvnicp->state);
+ }
+}
+
+static void handle_post_conn_setup(struct xsvnic *xsvnicp)
+{
+ int ret;
+ unsigned long flags;
+
+ xsvnicp->counters[XSVNIC_IB_RECOVERED_COUNTER]++;
+ xsvnicp->send_hbeat_flag = 0;
+ set_bit(XSVNIC_OPER_UP, &xsvnicp->state);
+ xsvnic_xsmp_send_oper_state(xsvnicp, xsvnicp->resource_id,
+ XSMP_XSVNIC_OPER_UP);
+ /*
+ * Now send multicast list & vlan list
+ */
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ _xsvnic_set_multicast(xsvnicp);
+ xsvnic_send_allvlan_list(xsvnicp);
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ init_completion(&xsvnicp->done);
+ set_bit(XSVNIC_START_RX_SENT, &xsvnicp->state);
+ clear_bit(XSVNIC_START_RESP_RCVD, &xsvnicp->state);
+ ret = xsvnic_send_start_stop(xsvnicp, XSVNIC_START_RX);
+ if (ret || !wait_for_completion_timeout(&xsvnicp->done,
+ msecs_to_jiffies(1000 * 5))) {
+ IOCTRL_ERROR("%s: start send failed ", xsvnicp->vnic_name);
+ IOCTRL_ERROR("%d or did not get rx start resp\n", ret);
+ xsvnic_set_oper_down(xsvnicp, 1);
+ } else {
+ napi_schedule(&xsvnicp->napi);
+ if (xsvnicp->mp_flag &
+ (MP_XSVNIC_PRIMARY | MP_XSVNIC_SECONDARY))
+ xsvnic_xsmp_send_ha_state(xsvnicp, xsvnicp->ha_state);
+ }
+}
+
+static void xsvnic_conn_state_machine(struct xsvnic *xsvnicp)
+{
+ struct xsvnic_conn *cconn = &xsvnicp->ctrl_conn;
+ struct xsvnic_conn *dconn = &xsvnicp->data_conn;
+ int ret;
+
+ switch (cconn->state) {
+ case XSVNIC_CONN_ERROR:
+ xsvnic_io_disconnect(xsvnicp);
+ break;
+ case XSVNIC_CONN_DISCONNECTED:
+ case XSVNIC_CONN_INIT:
+ xsvnicp->counters[XSVNIC_IB_RECOVERY_COUNTER]++;
+ set_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state);
+ clear_bit(XSVNIC_INTR_ENABLED, &xsvnicp->state);
+ clear_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state);
+ clear_bit(XSVNIC_MCAST_LIST_PENDING, &xsvnicp->state);
+ clear_bit(XSVNIC_MCAST_LIST_TIMEOUT, &xsvnicp->state);
+ cconn->state = XSVNIC_CONN_CONNECTING;
+ ret = xscore_conn_connect(&cconn->ctx, 0);
+ if (ret)
+ cconn->state = XSVNIC_CONN_ERROR;
+ break;
+ case XSVNIC_CONN_CONNECTED:
+ switch (dconn->state) {
+ case XSVNIC_CONN_ERROR:
+ xsvnic_io_disconnect(xsvnicp);
+ break;
+ case XSVNIC_CONN_DISCONNECTED:
+ case XSVNIC_CONN_INIT:
+ dconn->state = XSVNIC_CONN_CONNECTING;
+ ret = xscore_conn_connect(&dconn->ctx, 0);
+ if (ret) {
+ dconn->state = XSVNIC_CONN_ERROR;
+ cconn->state = XSVNIC_CONN_ERROR;
+ }
+ break;
+ case XSVNIC_CONN_CONNECTED:
+ handle_post_conn_setup(xsvnicp);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+/*
+ * This function can get called from workqueue/thread context
+ */
+static int xsvnic_state_machine(struct xsvnic *xsvnicp)
+{
+ if (!test_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state) ||
+ !test_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state) ||
+ test_bit(XSVNIC_XT_DOWN, &xsvnicp->state) ||
+ test_bit(XSVNIC_IBLINK_DOWN, &xsvnicp->state) ||
+ test_bit(XSVNIC_DELETING, &xsvnicp->state)) {
+ xsvnic_io_disconnect(xsvnicp);
+ if (test_bit(XSVNIC_SEND_ADMIN_STATE, &xsvnicp->state)) {
+ clear_bit(XSVNIC_SEND_ADMIN_STATE, &xsvnicp->state);
+ xsvnic_xsmp_send_notification(xsvnicp->xsmp_hndl,
+ xsvnicp->resource_id,
+ XSMP_XSVNIC_UPDATE);
+ }
+ if (test_bit(XSVNIC_CHASSIS_ADMIN_SHADOW_UP, &xsvnicp->state))
+ set_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state);
+ else
+ clear_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state);
+ xsvnicp->sm_delay = 2000;
+ handle_ha_sm(xsvnicp);
+ return 0;
+ }
+ /*
+ * If it is operationally up done with it
+ */
+ if (test_bit(XSVNIC_OPER_UP, &xsvnicp->state)) {
+ xsvnicp->counters[XSVNIC_OPER_UP_STATE_COUNTER]++;
+ handle_hbeat_sm(xsvnicp);
+ handle_ha_sm(xsvnicp);
+ handle_multicast(xsvnicp);
+ handle_action_flags(xsvnicp);
+ if (test_bit(XSVNIC_RX_NOBUF, &xsvnicp->state)) {
+ if (!xscore_refill_recv
+ (&xsvnicp->data_conn.ctx, GFP_KERNEL))
+ clear_bit(XSVNIC_RX_NOBUF, &xsvnicp->state);
+ else
+ xsvnicp->counters[XSVNIC_RX_NOBUF_COUNTER]++;
+ }
+ xsvnicp->sm_delay = 2000;
+ return 0;
+ }
+ xsvnic_conn_state_machine(xsvnicp);
+ xsvnicp->sm_delay = 1000;
+ return 0;
+}
+
+static void xsvnic_state_machine_work(struct work_struct *work)
+{
+ struct xsvnic *xsvnicp = container_of(work, struct xsvnic,
+ sm_work.work);
+
+ mutex_lock(&xsvnicp->mutex);
+ xsvnic_state_machine(xsvnicp);
+ mutex_unlock(&xsvnicp->mutex);
+ queue_sm_work(xsvnicp, xsvnicp->sm_delay);
+}
+
+static void queue_sm_work(struct xsvnic *xsvnicp, int msecs)
+{
+ unsigned long flags;
+ int del = 0;
+
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ if (!test_bit(XSVNIC_DELETING, &xsvnicp->state))
+ queue_delayed_work(xsvnic_wq, &xsvnicp->sm_work,
+ msecs_to_jiffies(msecs));
+ else
+ del = 1;
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ if (del)
+ xsvnic_remove_vnic(xsvnicp);
+}
+
+static void xsvnic_ctrl_event_handler(void *client_arg, int event)
+{
+ struct xsvnic *xsvnicp = client_arg;
+
+ mutex_lock(&xsvnicp->mutex);
+ switch (event) {
+ case XSCORE_CONN_CONNECTED:
+ xsvnicp->counters[XSVNIC_CTRL_CONN_OK_COUNTER]++;
+ xsvnicp->ctrl_conn.state = XSVNIC_CONN_CONNECTED;
+ break;
+ case XSCORE_CONN_ERR:
+ xsvnicp->counters[XSVNIC_CTRL_ERR_COUNTER]++;
+ xsvnicp->ctrl_conn.state = XSVNIC_CONN_ERROR;
+ break;
+ case XSCORE_CONN_RDISCONNECTED:
+ xsvnicp->counters[XSVNIC_CTRL_RDISC_COUNTER]++;
+ xsvnicp->ctrl_conn.state = XSVNIC_CONN_DISCONNECTED;
+ xsvnic_set_oper_down(xsvnicp, 1);
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&xsvnicp->mutex);
+}
+
+static void xsvnic_data_event_handler(void *client_arg, int event)
+{
+ struct xsvnic *xsvnicp = client_arg;
+
+ mutex_lock(&xsvnicp->mutex);
+ switch (event) {
+ case XSCORE_CONN_CONNECTED:
+ xsvnicp->counters[XSVNIC_DATA_CONN_OK_COUNTER]++;
+ xsvnicp->data_conn.state = XSVNIC_CONN_CONNECTED;
+ break;
+ case XSCORE_CONN_ERR:
+ xsvnicp->counters[XSVNIC_DATA_ERR_COUNTER]++;
+ xsvnicp->data_conn.state = XSVNIC_CONN_ERROR;
+ break;
+ case XSCORE_CONN_RDISCONNECTED:
+ xsvnicp->counters[XSVNIC_DATA_RDISC_COUNTER]++;
+ xsvnicp->data_conn.state = XSVNIC_CONN_DISCONNECTED;
+ xsvnic_set_oper_down(xsvnicp, 1);
+ break;
+ default:
+ break;
+ }
+ mutex_unlock(&xsvnicp->mutex);
+}
+
+static struct page *xsvnic_alloc_pages(int *size, int *page_order)
+{
+ gfp_t alloc_flags = GFP_ATOMIC;
+ u16 order = get_order(*size);
+ int chan_size = (1 << get_order(*size)) * PAGE_SIZE;
+
+ *size = chan_size;
+ *page_order = order;
+
+ if (order > 0)
+ alloc_flags |= __GFP_COMP;
+
+ return alloc_pages(alloc_flags, order);
+}
+
+static u8 *xsvnic_skb_alloc(void *client_arg, void **cookie, int len)
+{
+ struct xsvnic *xsvnicp = client_arg;
+ struct sk_buff *skb;
+
+ skb = dev_alloc_skb(len);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, NET_IP_ALIGN);
+ skb->dev = xsvnicp->netdev;
+ *cookie = skb;
+ xsvnicp->counters[XSVNIC_RX_SKB_ALLOC_COUNTER]++;
+ return skb->data;
+}
+
+static struct page *xsvnic_page_alloc(void *client_arg, void **cookie,
+ int *rsize, int element)
+{
+ struct xsvnic *xsvnicp = client_arg;
+ struct page *page = xsvnic_alloc_pages(rsize, &xsvnicp->page_order);
+
+ if (!page) {
+ pr_info("XSVNIC: Unable to allocate page size %d\n", *rsize);
+ return NULL;
+ }
+
+ xsvnicp->counters[XSVNIC_RX_SKB_ALLOC_COUNTER]++;
+ *cookie = page;
+
+ return page;
+}
+
+static void xsvnic_page_free(void *client_arg, void *cookie, int dir)
+{
+ struct sk_buff *skb = NULL;
+ struct page *page = NULL;
+ struct xsvnic *xsvnicp = client_arg;
+
+ if (dir == XSCORE_SEND_BUF) {
+ skb = cookie;
+ xsvnic_dev_kfree_skb_any(skb);
+ xsvnicp->counters[XSVNIC_TX_SKB_FREE_COUNTER]++;
+ } else {
+ xsvnicp->counters[XSVNIC_RX_SKB_FREE_COUNTER]++;
+ page = cookie;
+ put_page(page);
+ }
+
+}
+
+static void xsvnic_skb_free(void *client_arg, void *cookie, int dir)
+{
+ struct sk_buff *skb = cookie;
+ struct xsvnic *xsvnicp = client_arg;
+
+ xsvnic_dev_kfree_skb_any(skb);
+ if (dir == XSCORE_SEND_BUF)
+ xsvnicp->counters[XSVNIC_TX_SKB_FREE_COUNTER]++;
+ else
+ xsvnicp->counters[XSVNIC_RX_SKB_FREE_COUNTER]++;
+
+}
+
+static inline void xsvnic_process_rbuf_error(struct xsvnic *xsvnicp,
+ struct xscore_buf_info *binfo)
+{
+ struct page *page;
+ struct sk_buff *skb;
+
+ if (xsvnicp->is_rxbatching) {
+ page = binfo->cookie;
+ put_page(page);
+ } else {
+ skb = binfo->cookie;
+ xsvnic_dev_kfree_skb_any(skb);
+ }
+
+}
+
+static u8 *xsvnic_ctrl_alloc(void *client_arg, void **cookie, int sz)
+{
+ return kmalloc(sz, GFP_ATOMIC);
+}
+
+static void xsvnic_ctrl_free(void *client_arg, void *cookie, int dir)
+{
+ kfree(cookie);
+}
+
+static void xsvnic_buf_init(struct xsvnic *xsvnicp,
+ struct xscore_conn_ctx *cctx)
+{
+ if (xsvnicp->is_rxbatching) {
+ cctx->rx_buf_size = (PAGE_SIZE * 2);
+ cctx->alloc_page_bufs = xsvnic_page_alloc;
+ cctx->alloc_buf = 0;
+ cctx->free_buf = xsvnic_page_free;
+ } else {
+ cctx->rx_buf_size = xsvnicp->mtu + NET_IP_ALIGN + ETH_HLEN + 12;
+ cctx->alloc_page_bufs = 0;
+ cctx->alloc_buf = xsvnic_skb_alloc;
+ cctx->free_buf = xsvnic_skb_free;
+ }
+}
+
+int check_rxbatch_possible(struct xsvnic *xsvnicp, int flag)
+{
+ if (flag && (xsvnicp->install_flag & XSVNIC_INSTALL_RX_BAT)
+ && (xsvnicp->install_flag & XSVNIC_8K_IBMTU)
+ && (xsvnicp->mtu <= (PAGE_SIZE * 2)) && xsvnicp->xsmp_info.is_shca)
+ return 1;
+ else
+ return 0;
+}
+
+static void handle_rxbatch_change(struct xsvnic *xsvnicp)
+{
+ int ret;
+ struct xscore_conn_ctx *ctx = &xsvnicp->data_conn.ctx;
+ struct xt_cm_private_data *cmp =
+ (struct xt_cm_private_data *)ctx->priv_data;
+
+ clear_bit(XSVNIC_RXBATCH_CHANGE, &xsvnicp->state);
+ xscore_conn_destroy(ctx);
+
+ /*
+ * Change rx batching settings
+ */
+ xsvnicp->is_rxbatching = xsvnicp->is_rxbat_operational;
+ xsvnic_buf_init(xsvnicp, ctx);
+
+ if (xsvnicp->is_rxbatching) {
+ cmp->data_qp_type |= cpu_to_be32(XSVNIC_RXBAT_BIT);
+ cmp->data_qp_type |= cpu_to_be32(XSVNIC_RXBAT_TIMER_BIT);
+ } else {
+ cmp->data_qp_type &= ~(cpu_to_be32(XSVNIC_RXBAT_BIT));
+ cmp->data_qp_type &= ~(cpu_to_be32(XSVNIC_RXBAT_TIMER_BIT));
+ }
+
+ ret = xscore_conn_init(ctx, xsvnicp->xsmp_info.port);
+ if (ret)
+ DRV_ERROR("xscore_conn_init data error for VNIC %s, ret = %d\n",
+ xsvnicp->vnic_name, ret);
+}
+
+static int xsvnic_conn_init(struct xsvnic *xsvnicp)
+{
+ struct xsvnic_conn *cp;
+ struct xscore_conn_ctx *cctx;
+ struct xt_cm_private_data *cmp;
+ int ret;
+
+ cp = &xsvnicp->ctrl_conn;
+ cctx = &cp->ctx;
+ /*
+ * Control connection
+ */
+ cp->type = XSVNIC_IO_QP_TYPE_CONTROL;
+ cctx->tx_ring_size = 4;
+ cctx->rx_ring_size = 4;
+ cctx->rx_buf_size = XSVNIC_MAX_BUF_SIZE;
+ cctx->client_arg = xsvnicp;
+ cctx->alloc_buf = xsvnic_ctrl_alloc;
+ cctx->free_buf = xsvnic_ctrl_free;
+ cctx->send_compl_handler = xsvnic_ctrl_send_handler;
+ cctx->recv_msg_handler = xsvnic_ctrl_recv_handler;
+ cctx->event_handler = xsvnic_ctrl_event_handler;
+ cctx->dguid = xsvnicp->tca_guid;
+ cctx->dlid = xsvnicp->tca_lid;
+ cctx->service_id = be64_to_cpu(TCA_SERVICE_ID);
+
+ cmp = (struct xt_cm_private_data *)cctx->priv_data;
+ cmp->vid = cpu_to_be64(xsvnicp->resource_id);
+ cmp->qp_type = cpu_to_be16(XSVNIC_IO_QP_TYPE_CONTROL);
+
+ cctx->priv_data_len = sizeof(*cmp);
+
+ ret = xscore_conn_init(cctx, xsvnicp->xsmp_info.port);
+ if (ret) {
+ DRV_ERROR("xscore_conn_init ctrl error for VID %llx %d\n",
+ xsvnicp->resource_id, ret);
+ return ret;
+ }
+
+ cp = &xsvnicp->data_conn;
+ cctx = &cp->ctx;
+
+ cp->type = XSVNIC_IO_QP_TYPE_DATA;
+ cctx->tx_ring_size = xsvnicp->tx_ring_size;
+ cctx->rx_ring_size = xsvnicp->rx_ring_size;
+ cctx->client_arg = xsvnicp;
+
+ /*
+ * 8K IB MTU is for softhca only
+ */
+ if (xsvnicp->install_flag & XSVNIC_8K_IBMTU
+ && xsvnicp->xsmp_info.is_shca)
+ cctx->features |= XSCORE_8K_IBMTU_SUPPORT;
+
+ if (check_rxbatch_possible(xsvnicp, xsvnic_rxbatching))
+ xsvnicp->is_rxbatching = 1;
+
+ xsvnic_buf_init(xsvnicp, cctx);
+
+ cctx->send_compl_handler = 0;
+ cctx->recv_compl_handler = xsvnic_data_recv_handler;
+ cctx->event_handler = xsvnic_data_event_handler;
+ cctx->dguid = xsvnicp->tca_guid;
+ cctx->dlid = xsvnicp->tca_lid;
+ cctx->service_id = be64_to_cpu(TCA_SERVICE_ID);
+ cctx->features |= XSCORE_SG_SUPPORT;
+ if (!xsvnic_tx_intr_mode) {
+ cctx->features |= XSCORE_NO_SEND_COMPL_INTR;
+ } else {
+ cctx->tx_max_coalesced_frames = xsvnic_max_coal_frames;
+ cctx->tx_coalesce_usecs = xsvnic_coal_usecs;
+ }
+
+ if (!xsvnic_rx_intr_mode) {
+ cctx->features |= XSCORE_NO_RECV_COMPL_INTR;
+ } else {
+ cctx->rx_max_coalesced_frames = xsvnic_max_coal_frames;
+ cctx->rx_coalesce_usecs = xsvnic_coal_usecs;
+ }
+
+ cmp = (struct xt_cm_private_data *)cctx->priv_data;
+ cmp->vid = cpu_to_be64(xsvnicp->resource_id);
+ cmp->qp_type = cpu_to_be16(XSVNIC_IO_QP_TYPE_DATA);
+
+ if (xsvnicp->is_tso && (xsvnicp->netdev->features & NETIF_F_TSO))
+ cmp->data_qp_type |= cpu_to_be32(XSVNIC_TSO_BIT);
+
+ if (xsvnicp->is_rxbatching) {
+ cmp->data_qp_type |= cpu_to_be32(XSVNIC_RXBAT_BIT);
+ cmp->data_qp_type |= cpu_to_be32(XSVNIC_RXBAT_TIMER_BIT);
+ }
+
+ cctx->priv_data_len = sizeof(*cmp);
+
+ ret = xscore_conn_init(cctx, xsvnicp->xsmp_info.port);
+ if (ret) {
+ DRV_ERROR("xscore_conn_init data error for VID %llx %d\n",
+ xsvnicp->resource_id, ret);
+ xscore_conn_destroy(&xsvnicp->ctrl_conn.ctx);
+ }
+ return ret;
+}
+
+/*
+ * All the functions related to the stack
+ */
+
+static void xsvnic_setup(struct net_device *netdev)
+{
+ ether_setup(netdev);
+}
+
+static int xsvnic_open(struct net_device *netdev)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+
+ xsvnicp->counters[XSVNIC_OPEN_COUNTER]++;
+ mutex_lock(&xsvnicp->mutex);
+ napi_enable(&xsvnicp->napi);
+ set_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state);
+ mutex_unlock(&xsvnicp->mutex);
+ return 0;
+}
+
+static int xsvnic_stop(struct net_device *netdev)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+ unsigned long flags;
+
+#ifdef __VMKLNX__
+ /* set trans_start so we don't get spurious watchdogs during reset */
+ netdev->trans_start = jiffies;
+#endif
+
+ xsvnicp->counters[XSVNIC_STOP_COUNTER]++;
+ mutex_lock(&xsvnicp->mutex);
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ clear_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state);
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ xsvnic_io_disconnect(xsvnicp);
+ napi_disable(&xsvnicp->napi);
+ mutex_unlock(&xsvnicp->mutex);
+ return 0;
+}
+
+static struct net_device_stats *xsvnic_get_stats(struct net_device *netdev)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+
+ xsvnicp->counters[XSVNIC_GETSTATS_COUNTER]++;
+ return &xsvnicp->stats;
+}
+
+static void xsvnic_tx_timeout(struct net_device *dev)
+{
+ struct xsvnic *xsvnicp = netdev_priv(dev);
+
+ xsvnicp->counters[XSVNIC_WDOG_TIMEOUT_COUNTER]++;
+ xsvnic_set_oper_down(xsvnicp, 1);
+}
+
+static int xsvnic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ return 0;
+}
+
+static int xsvnic_set_mac_address(struct net_device *dev, void *p)
+{
+ struct sockaddr *addr = p;
+ struct xsvnic *xsvnicp = netdev_priv(dev);
+
+ if (!is_valid_ether_addr((u8 *) (addr->sa_data)))
+ return -EINVAL;
+
+ if (memcmp(dev->dev_addr, addr->sa_data, dev->addr_len) != 0) {
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ xsvnicp->counters[XSVNIC_MAC_ADDR_CHNG]++;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy all the Multicast addresses from src to the xsvnic device dst
+ */
+static int xsvnic_mc_list_copy(struct xsvnic *xsvnicp)
+{
+ struct net_device *netdev = xsvnicp->netdev;
+
+ if (xsvnicp->mc_addrs != NULL)
+ kfree(xsvnicp->mc_addrs);
+
+ xsvnicp->mc_addrs = kmalloc(netdev_mc_count(netdev) *
+ sizeof(struct ether_addr), GFP_ATOMIC);
+
+ if (!xsvnicp->mc_addrs)
+ return -ENOMEM;
+ xsvnicp->mc_count = netdev_mc_count(netdev);
+ netdev_mc_list_copy(xsvnicp);
+ return 0;
+}
+
+static void _xsvnic_set_multicast(struct xsvnic *xsvnicp)
+{
+ int count = xsvnicp->mc_count;
+ int i;
+ u8 *msg, *pay;
+ int tlen;
+
+ if (multicast_list_disable || xsvnicp->ctrl_conn.state
+ != XSVNIC_CONN_CONNECTED)
+ return;
+
+ if (test_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state)) {
+ /*
+ * Once response comes back for sent list, this will trigger
+ * another send operation
+ */
+ set_bit(XSVNIC_MCAST_LIST_PENDING, &xsvnicp->state);
+ return;
+ }
+
+ xsvnicp->counters[XSVNIC_SET_MCAST_COUNTER]++;
+ /*
+ * Copy over the multicast list and send it over
+ */
+ xsvnicp->iff_promisc = 0;
+ if ((xsvnicp->netdev->flags & (IFF_ALLMULTI | IFF_PROMISC))
+ || count > XSVNIC_MACLIST_MAX)
+ xsvnicp->iff_promisc = 1;
+ if (count > XSVNIC_MACLIST_MAX)
+ count = XSVNIC_MACLIST_MAX;
+ tlen = ETH_ALEN * count + sizeof(struct xsvnic_control_msg);
+ msg = kmalloc(tlen, GFP_ATOMIC);
+ if (!msg)
+ return;
+ pay = msg + sizeof(struct xsvnic_control_msg);
+ for (i = 0; i < count; i++) {
+ ether_addr_copy(pay, (u8 *)&(xsvnicp->mc_addrs[i]));
+ pay += ETH_ALEN;
+ }
+ xsvnic_send_multicast_list(xsvnicp, msg, tlen, xsvnicp->iff_promisc);
+}
+
+static void xsvnic_set_multicast(struct net_device *netdev)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ xsvnic_mc_list_copy(xsvnicp);
+ _xsvnic_set_multicast(xsvnicp);
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+}
+
+static int xsvnic_vlan_rx_add_vlanid(struct net_device *netdev, __be16 proto,
+ u16 vlanid)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+ struct vlan_entry *vlan;
+ unsigned long flags;
+
+ xsvnicp->counters[XSVNIC_VLAN_RX_ADD_COUNTER]++;
+ /*
+ * The control message to IOP can accommodate 1024 size
+ * We restrict the number of vlans to 500
+ * Ideally we do not need it since it was for legacy reasons
+ */
+ if (xsvnicp->vlan_count >= XSVNIC_VLANLIST_MAX)
+ return -1;
+ vlan = kmalloc(sizeof(struct vlan_entry), GFP_ATOMIC);
+ if (!vlan)
+ return -1;
+ INIT_LIST_HEAD(&vlan->vlan_list);
+ vlan->vlan_id = vlanid;
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ list_add_tail(&vlan->vlan_list, &xsvnicp->vlan_list);
+ xsvnicp->vlan_count++;
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ xsvnic_send_vlan_list(xsvnicp, &vlanid, 1, XSVNIC_ASSIGN_VLAN);
+ return 0;
+}
+
+static int xsvnic_vlan_rx_kill_vlanid(struct net_device *netdev, __be16 proto,
+ u16 vlanid)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+ struct vlan_entry *vlan;
+ unsigned long flags;
+
+ xsvnicp->counters[XSVNIC_VLAN_RX_DEL_COUNTER]++;
+
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ list_for_each_entry(vlan, &xsvnicp->vlan_list, vlan_list) {
+ if (vlan->vlan_id == vlanid) {
+ list_del(&vlan->vlan_list);
+ kfree(vlan);
+ xsvnicp->vlan_count--;
+ xsvnic_send_vlan_list(xsvnicp, &vlanid, 1,
+ XSVNIC_UNASSIGN_VLAN);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ return 0;
+}
+
+int xsvnic_change_rxbatch(struct xsvnic *xsvnicp, int flag)
+{
+
+ if (xsvnicp->is_rxbatching != flag) {
+ if (flag && !check_rxbatch_possible(xsvnicp, flag))
+ return -EINVAL;
+
+ set_bit(XSVNIC_RXBATCH_CHANGE, &xsvnicp->state);
+ xsvnic_set_oper_down(xsvnicp, 1);
+ xsvnicp->is_rxbat_operational = flag;
+ }
+
+ return 1;
+}
+/*
+static int xsvnic_get_settings(struct net_device *netdev,
+ struct ethtool_cmd *ecmd)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+
+ ecmd->autoneg = 0;
+ ecmd->speed = SPEED_1000;
+ ecmd->duplex = DUPLEX_FULL;
+ if (netif_carrier_ok(netdev)) {
+ if ((xsvnicp->port_speed == SPEED_1000) && xsvnic_report_10gbps)
+ ecmd->speed = SPEED_10000;
+ else
+ ecmd->speed = xsvnicp->port_speed;
+
+ if (ecmd->speed > SPEED_1000) {
+ ecmd->advertising = ADVERTISED_10000baseT_Full;
+ ecmd->supported = SUPPORTED_10000baseT_Full |
+ SUPPORTED_FIBRE | SUPPORTED_Autoneg;
+ ecmd->port = PORT_FIBRE;
+ ecmd->transceiver = XCVR_EXTERNAL;
+ } else {
+ ecmd->advertising = ADVERTISED_1000baseT_Full |
+ ADVERTISED_100baseT_Full;
+ ecmd->supported =
+ SUPPORTED_10baseT_Full | SUPPORTED_10baseT_Half |
+ SUPPORTED_100baseT_Full | SUPPORTED_100baseT_Half |
+ SUPPORTED_1000baseT_Full | SUPPORTED_1000baseT_Half
+ | SUPPORTED_TP | SUPPORTED_Autoneg;
+ ecmd->transceiver = XCVR_INTERNAL;
+ ecmd->port = PORT_TP;
+ }
+ }
+ return 0;
+}
+*/
+/*
+static int xsvnic_set_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ering)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+
+ if (ering->rx_pending >= 32
+ && ering->rx_pending <= ering->rx_max_pending)
+ xsvnicp->rx_ring_size = ering->rx_pending;
+
+ if (ering->tx_pending >= 32
+ && ering->tx_pending <= ering->tx_max_pending)
+ xsvnicp->tx_ring_size = ering->tx_pending;
+
+ set_bit(XSVNIC_RING_SIZE_CHANGE, &xsvnicp->state);
+ xsvnic_set_oper_down(xsvnicp, 1);
+ return 0;
+}
+*/
+/*
+static void xsvnic_get_ringparam(struct net_device *netdev,
+ struct ethtool_ringparam *ering)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+
+ ering->rx_max_pending = 2048;
+ ering->rx_mini_max_pending = 0;
+ ering->rx_jumbo_max_pending = 384;
+ ering->rx_pending = xsvnicp->data_conn.ctx.rx_ring_size;
+ ering->rx_mini_pending = 0;
+ ering->rx_jumbo_pending = xsvnicp->data_conn.ctx.rx_ring_size;
+ ering->tx_max_pending = 2048;
+ ering->tx_pending = xsvnicp->data_conn.ctx.tx_ring_size;
+}
+*/
+/*
+static void xsvnic_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strncpy(drvinfo->driver, "xsvnic", 32);
+ strncpy(drvinfo->version, XSVNIC_DRIVER_VERSION, 32);
+ strncpy(drvinfo->fw_version, "N/A", 32);
+ strncpy(drvinfo->bus_info, "N/A", 32);
+}
+*/
+
+u32 xsvnic_op_get_rx_csum(struct net_device *dev)
+{
+ return (dev->features & NETIF_F_IP_CSUM) != 0;
+}
+
+int xsvnic_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
+{
+ struct xsvnic *xsvnicp = netdev_priv(dev);
+
+ if (xsvnic_tx_intr_mode) {
+ coal->tx_coalesce_usecs =
+ xsvnicp->data_conn.ctx.tx_coalesce_usecs;
+ coal->tx_max_coalesced_frames =
+ xsvnicp->data_conn.ctx.tx_max_coalesced_frames;
+ }
+
+ if (xsvnic_rx_intr_mode) {
+ coal->rx_coalesce_usecs =
+ xsvnicp->data_conn.ctx.rx_coalesce_usecs;
+ coal->rx_max_coalesced_frames =
+ xsvnicp->data_conn.ctx.rx_max_coalesced_frames;
+ }
+
+ return 0;
+}
+
+int xsvnic_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
+{
+
+ struct xsvnic *xsvnicp = netdev_priv(dev);
+ u32 tx_usecs, tx_frames;
+ u32 rx_usecs, rx_frames;
+ u32 ret;
+ struct xscore_conn_ctx *ctx;
+
+ if (coal->rx_coalesce_usecs > 0xffff ||
+ coal->rx_max_coalesced_frames > 0xffff)
+ return -EINVAL;
+
+ ctx = &xsvnicp->data_conn.ctx;
+
+ tx_usecs = ctx->tx_coalesce_usecs;
+ tx_frames = ctx->tx_max_coalesced_frames;
+ rx_usecs = ctx->rx_coalesce_usecs;
+ rx_frames = ctx->rx_max_coalesced_frames;
+
+ /* Modify TX cq */
+ if (xsvnic_tx_intr_mode && ((tx_usecs != coal->tx_coalesce_usecs) ||
+ (tx_frames !=
+ coal->tx_max_coalesced_frames))) {
+ ret = xscore_modify_cq(ctx->scq, coal->tx_max_coalesced_frames,
+ coal->tx_coalesce_usecs);
+ if (ret && ret != -ENOSYS) {
+ pr_info("failed modifying Send CQ (%d) vnic ", ret);
+ pr_info("%s\n", xsvnicp->vnic_name);
+ return ret;
+ }
+
+ ctx->tx_coalesce_usecs = coal->tx_coalesce_usecs;
+ ctx->tx_max_coalesced_frames = coal->tx_max_coalesced_frames;
+ }
+
+ /* Modify RX cq */
+ if (xsvnic_rx_intr_mode && ((rx_usecs != coal->rx_coalesce_usecs) ||
+ (rx_frames !=
+ coal->rx_max_coalesced_frames))) {
+ ret = xscore_modify_cq(ctx->rcq, coal->rx_max_coalesced_frames,
+ coal->rx_coalesce_usecs);
+ if (ret && ret != -ENOSYS) {
+ pr_err("failed modifying Recv CQ (%d) vnic ", ret);
+ pr_err("%s\n", xsvnicp->vnic_name);
+ return ret;
+ }
+ ctx->rx_coalesce_usecs = coal->rx_coalesce_usecs;
+ ctx->rx_max_coalesced_frames = coal->rx_max_coalesced_frames;
+ }
+
+ return 0;
+}
+
+/*
+static struct ethtool_ops xsvnic_ethtool_ops = {
+ .get_settings = xsvnic_get_settings,
+ .get_drvinfo = xsvnic_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_ringparam = xsvnic_get_ringparam,
+ .set_ringparam = xsvnic_set_ringparam,
+ .set_coalesce = xsvnic_set_coalesce,
+};
+*/
+
+static int xsvnic_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+ struct mii_ioctl_data *data = if_mii(ifr);
+ int ret = 0;
+ struct xsvnic *xsvnicp;
+
+ if (!netif_running(netdev))
+ return -EAGAIN;
+
+ xsvnicp = netdev_priv(netdev);
+ xsvnicp->counters[XSVNIC_IOCTL_COUNTER]++;
+
+ switch (cmd) {
+ case SIOCGMIIPHY:
+ data->phy_id = 5;
+ break;
+ case SIOCGMIIREG:
+ /*
+ * Mainly used by mii monitor
+ */
+ switch (data->reg_num) {
+ case 0:
+ data->val_out = 0x2100;
+ break;
+ case 1:
+ data->val_out = 0xfe00 |
+ (netif_carrier_ok(netdev) << 2);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Needs to be clled with spin_lock held
+ */
+static void handle_qp_error(struct xsvnic *xsvnicp, int qp_error)
+{
+ pr_info("XSVNIC %s: Link Down ", xsvnicp->vnic_name);
+ pr_info("(QP error %d)\n", qp_error);
+ xsvnicp->counters[XSVNIC_QP_ERROR_COUNTER]++;
+ xsvnic_set_oper_down(xsvnicp, 0);
+}
+
+static void xsvnic_reclaim_tx_buffers(struct xsvnic *xsvnicp)
+{
+ struct xscore_buf_info binfo;
+ int qp_error = 0;
+ /*
+ * Now reap completions
+ */
+ while (xscore_poll_send(&xsvnicp->data_conn.ctx, &binfo) > 0) {
+ CALC_MAX_MIN_TXTIME(xsvnicp, binfo.time_stamp);
+ xsvnicp->counters[XSVNIC_TX_SKB_FREE_COUNTER_REAP]++;
+ xsvnic_dev_kfree_skb_any(binfo.cookie);
+ if (binfo.status) {
+ IOCTRL_INFO("VNIC: %s Data Send Completion error: %d\n",
+ xsvnicp->vnic_name, binfo.status);
+ qp_error = binfo.status;
+ }
+ }
+ if (qp_error)
+ handle_qp_error(xsvnicp, qp_error);
+}
+
+int xsvnic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+ int ret = NETDEV_TX_OK;
+ int slen = skb->len;
+ unsigned long flags;
+ u8 skb_need_tofree = 0;
+
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+
+ /* Stop sending packet if standby interface */
+ if (xsvnicp->mp_flag
+ && unlikely(test_bit(XSVNIC_STATE_STDBY, &xsvnicp->state))) {
+ dev_kfree_skb_any(skb);
+ xsvnicp->counters[XSVNIC_TX_DROP_STANDBY_COUNTER]++;
+ goto out;
+ }
+
+ if (!test_bit(XSVNIC_OPER_UP, &xsvnicp->state)) {
+ ret = NETDEV_TX_BUSY;
+ xsvnicp->stats.tx_dropped++;
+ xsvnicp->counters[XSVNIC_TX_DROP_OPER_DOWN_COUNT]++;
+ goto out;
+ }
+
+ if (skb->len < XSVNIC_MIN_PACKET_LEN) {
+ xsvnicp->counters[XSVNIC_SHORT_PKT_COUNTER]++;
+ if (skb_padto(skb, XSVNIC_MIN_PACKET_LEN)) {
+ ret = 0;
+ xsvnicp->stats.tx_dropped++;
+ xsvnicp->counters[XSVNIC_TX_SKB_ALLOC_ERROR_COUNTER]++;
+ goto reclaim;
+ }
+ skb->len = XSVNIC_MIN_PACKET_LEN;
+
+ }
+ CALC_MAX_PKT_TX(xsvnicp, skb->len);
+ /*
+ * Check if it is a gso packet
+ */
+ if (xsvnicp->is_tso) {
+ int mss, hroom;
+ int doff = 0;
+ struct xs_tsovlan_header *hdrp;
+ u16 vid = 0;
+
+ if (skb_vlan_tag_present(skb)) {
+ hroom = sizeof(struct xs_tsovlan_header);
+ vid = skb_vlan_tag_get(skb);
+ xsvnicp->counters[XSVNIC_TX_VLAN_COUNTER]++;
+ } else {
+ hroom = sizeof(struct xs_tso_header);
+ }
+ if (unlikely(skb_headroom(skb) < hroom)) {
+ if (skb_cow(skb, hroom) < 0) {
+ xsvnicp->stats.tx_dropped++;
+ xsvnicp->counters[XSVNIC_TX_EXPANDSKB_ERROR]++;
+ skb_need_tofree = 1;
+ goto free_skb;
+ }
+ xsvnicp->counters[XSVNIC_TX_SKB_NOHEAD_COUNTER]++;
+ }
+
+ mss = skb_is_gso(skb);
+ if (mss) {
+ if (skb_header_cloned(skb)) {
+ xsvnicp->counters
+ [XSVNIC_TX_EXPAND_HEAD_COUNTER]++;
+ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ if (ret) {
+ xsvnicp->counters
+ [XSVNIC_TX_EXPAND_HEAD_ECNTR]++;
+ skb_need_tofree = 1;
+ goto free_skb;
+ }
+ }
+ hdrp = (struct xs_tsovlan_header *)skb_push(skb, hroom);
+ /*
+ * Now add the MSS and data offset into the 4 byte
+ * pre-header Into gso_header
+ */
+ doff =
+ skb_transport_offset(skb) + tcp_hdrlen(skb) - hroom;
+ xsvnicp->counters[XSVNIC_TX_SKB_TSO_COUNTER]++;
+ hdrp->tso_info =
+ cpu_to_be32((1 << 30) | (doff << 16) | mss);
+ } else {
+ hdrp = (struct xs_tsovlan_header *)skb_push(skb, hroom);
+ hdrp->tso_info = cpu_to_be32((1 << 30) | (1 << 28));
+ }
+
+ if (vid) {
+ hdrp->vlan_info = cpu_to_be32(vid);
+ hdrp->tso_info =
+ be32_to_cpu(hdrp->tso_info) | (3 << 30);
+ }
+ }
+ /*
+ * Spin lock has to be released for soft-HCA to work correctly
+ */
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ ret = xscore_post_send_sg(&xsvnicp->data_conn.ctx, skb, 0);
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ if (ret) {
+ if (ret == -ENOBUFS) {
+ xsvnicp->stats.tx_dropped++;
+ xsvnicp->counters[XSVNIC_TX_RING_FULL_COUNTER]++;
+ } else {
+ handle_qp_error(xsvnicp, ret);
+ }
+ ret = NETDEV_TX_OK;
+ skb_need_tofree = 1;
+ goto free_skb;
+ }
+ netdev->trans_start = jiffies;
+ xsvnicp->send_hbeat_flag = 0;
+ xsvnicp->stats.tx_packets++;
+ xsvnicp->stats.tx_bytes += slen;
+ xsvnicp->counters[XSVNIC_TX_COUNTER]++;
+
+free_skb:
+ if (skb_need_tofree)
+ dev_kfree_skb(skb);
+
+ if (!xsvnic_tx_intr_mode
+ && (xsvnicp->reclaim_count++ > xsvnic_reclaim_count)) {
+reclaim:
+ xsvnicp->reclaim_count = 0;
+ xsvnic_reclaim_tx_buffers(xsvnicp);
+ }
+out:
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+
+ return ret;
+}
+
+static inline void xsvnic_untag_vlan(struct xsvnic *xsvnicp,
+ struct sk_buff *skb, u16 *vlan_tci)
+{
+ struct ethhdr *eh = (struct ethhdr *)(skb->data);
+
+ if (eh->h_proto == htons(ETH_P_8021Q)) {
+ struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+ /*
+ * Grab VLAN information and TCI fields and populate SKB
+ * Strip the vlan tag
+ */
+ *vlan_tci = be16_to_cpu(veth->h_vlan_TCI);
+ memmove((u8 *) eh + VLAN_HLEN, eh, ETH_ALEN * 2);
+ skb_pull(skb, VLAN_HLEN);
+ }
+}
+
+static inline void xsvnic_verify_checksum(struct xsvnic *xsvnicp,
+ struct sk_buff *skb, int sz)
+{
+ u32 trailer;
+
+ if (xsvnic_force_csum_offload) {
+ xsvnicp->counters[XSVNIC_RX_SKB_OFFLOAD_COUNTER]++;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb_trim(skb, sz - sizeof(int));
+ return;
+ } else
+ skb->ip_summed = CHECKSUM_NONE;
+
+ trailer = be32_to_cpu(*(u32 *) ((u8 *) skb->data + sz - 4));
+
+ skb_trim(skb, sz - sizeof(int));
+
+ if (!(trailer & XSIGO_IPV4_BIT)) {
+ xsvnicp->counters[XSVNIC_RX_SKB_OFFLOAD_NONIPV4_COUNTER]++;
+ return;
+ }
+
+ if (trailer & (XSIGO_TCP_CHKSUM_GOOD_BIT | XSIGO_UDP_CHKSUM_GOOD_BIT)) {
+ xsvnicp->counters[XSVNIC_RX_SKB_OFFLOAD_COUNTER]++;
+ if (trailer & XSIGO_IP_FRAGMENT_BIT) {
+ skb->csum = (trailer >> 16);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ xsvnicp->counters[XSVNIC_RX_SKB_OFFLOAD_FRAG_COUNTER]++;
+ } else
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+}
+
+char *xsvnic_get_rxbat_pkts(struct xsvnic *xsvnicp, int *curr_seg_len,
+ char *start, char *is_last_pkt, int total_pkt_len)
+{
+ int rxbat_hdr = be32_to_cpu(*(u32 *) start);
+ *curr_seg_len = RXBAT_FRAG_LEN(rxbat_hdr);
+ *is_last_pkt = (RXBAT_FINAL_BIT(rxbat_hdr) ? 1 : 0);
+ return start + XS_RXBAT_HDRLEN;
+
+}
+
+void xsvnic_send_skb(struct xsvnic *xsvnicp, struct sk_buff *skb,
+ int curr_pkt_len, char chksum_offload)
+{
+ struct net_device *netdev = xsvnicp->netdev;
+ u16 vlan_tci = 0xFFFF;
+
+ skb->dev = netdev;
+ if ((netdev->features & NETIF_F_IP_CSUM) && chksum_offload)
+ xsvnic_verify_checksum(xsvnicp, skb, curr_pkt_len);
+ else
+ skb->ip_summed = CHECKSUM_NONE;
+ /*
+ * Software based VLAN acceleration enabled, so process it
+ */
+ if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+ xsvnic_untag_vlan(xsvnicp, skb, &vlan_tci);
+
+ skb->protocol = eth_type_trans(skb, netdev);
+ xsvnicp->stats.rx_packets++;
+ xsvnicp->stats.rx_bytes += curr_pkt_len;
+ CALC_MAX_PKT_RX(xsvnicp, skb->len);
+ /* Enable dumping packets on Demand */
+ XSIGO_DUMP_PKT(skb->data, skb->len, "xsvnic_process_rx_skb");
+ /*
+ * Check if it is HA and standby and drop the packet
+ */
+ if (xsvnicp->mp_flag
+ && unlikely(test_bit(XSVNIC_STATE_STDBY, &xsvnicp->state))) {
+ dev_kfree_skb_any(skb);
+ xsvnicp->counters[XSVNIC_RX_DROP_STANDBY_COUNTER]++;
+ } else {
+ if (xsvnic_vlanaccel && (vlan_tci != 0xFFFF)) {
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+ vlan_tci);
+ xsvnicp->counters[XSVNIC_RX_SENDTO_VLANGRP]++;
+ }
+
+ if (netdev->features & NETIF_F_LRO)
+ lro_receive_skb(&xsvnicp->lro.lro_mgr, skb, NULL);
+ else
+ netif_receive_skb(skb);
+ }
+ netdev->last_rx = jiffies;
+}
+
+void xsvnic_count_segs(struct xsvnic *xsvnicp, char nr_segs, int pkt_len)
+{
+ if (nr_segs > 1) {
+ xsvnicp->counters[XSVNIC_RXBAT_PKTS]++;
+ if (nr_segs <= 5)
+ xsvnicp->counters[XSVNIC_RXBAT_BELOW_5SEGS]++;
+ else if (nr_segs > 5 && nr_segs <= 10)
+ xsvnicp->counters[XSVNIC_RXBAT_BTW_5_10SEGS]++;
+ else if (nr_segs > 10 && nr_segs <= 20)
+ xsvnicp->counters[XSVNIC_RXBAT_BTW_10_20SEGS]++;
+ else
+ xsvnicp->counters[XSVNIC_RXBAT_ABOVE_20SEGS]++;
+
+ if (nr_segs > xsvnicp->counters[XSVNIC_RX_MAXBATED_COUNTER])
+ xsvnicp->counters[XSVNIC_RX_MAXBATED_COUNTER] = nr_segs;
+ }
+ if (pkt_len > PAGE_SIZE)
+ xsvnicp->counters[XSVNIC_8KBAT_PKTS]++;
+}
+
+int xsvnic_align_addr(char **start)
+{
+ int align_diff;
+ char *align_addr = (char *)((unsigned long)(*start + 3) & ~0x3);
+
+ align_diff = align_addr - *start;
+ *start = align_addr;
+ return align_diff;
+}
+
+void xsvnic_process_rx_skb(struct xsvnic *xsvnicp,
+ struct xscore_buf_info *binfo)
+{
+ struct sk_buff *skb;
+ int tot_pkt_len;
+
+ tot_pkt_len = binfo->sz;
+ skb = binfo->cookie;
+ skb_put(skb, tot_pkt_len);
+ xsvnic_send_skb(xsvnicp, skb, tot_pkt_len, 1);
+
+}
+
+int xsvnic_poll(struct napi_struct *napi, int budget)
+{
+ struct xsvnic *xsvnicp = container_of(napi, struct xsvnic, napi);
+ struct xscore_conn_ctx *ctx = &xsvnicp->data_conn.ctx;
+ struct xscore_buf_info binfo;
+ int ret, done = 0, qp_error = 0;
+ unsigned long flags;
+
+ /*
+ * If not connected complete it
+ */
+ xsvnicp->counters[XSVNIC_NAPI_POLL_COUNTER]++;
+ if (!test_bit(XSVNIC_OPER_UP, &xsvnicp->state)) {
+ napi_complete(&xsvnicp->napi);
+ clear_bit(XSVNIC_INTR_ENABLED, &xsvnicp->state);
+ return 0;
+ }
+again:
+ while (done < budget) {
+ ret = xscore_read_buf(ctx, &binfo);
+ if (ret != 1 || binfo.status) {
+ if (binfo.status) {
+ qp_error = 1;
+ handle_qp_error(xsvnicp, binfo.status);
+ xsvnic_process_rbuf_error(xsvnicp, &binfo);
+ }
+ break;
+ }
+
+ if (xsvnicp->is_rxbatching)
+ xsvnic_process_pages(xsvnicp, &binfo);
+ else
+ xsvnic_process_rx_skb(xsvnicp, &binfo);
+
+ xsvnicp->counters[XSVNIC_RX_SKB_COUNTER]++;
+ done++;
+ }
+
+ napi_update_budget(&xsvnicp->napi, done);
+
+ if (!qp_error && !test_bit(XSVNIC_RX_NOBUF, &xsvnicp->state)) {
+ if (xscore_refill_recv(&xsvnicp->data_conn.ctx, GFP_ATOMIC)) {
+ xsvnicp->counters[XSVNIC_RX_NOBUF_COUNTER]++;
+ set_bit(XSVNIC_RX_NOBUF, &xsvnicp->state);
+ }
+ }
+ if (done < budget) {
+ if (xsvnicp->netdev->features & NETIF_F_LRO)
+ lro_flush_all(&xsvnicp->lro.lro_mgr);
+ napi_complete(&xsvnicp->napi);
+ clear_bit(XSVNIC_OVER_QUOTA, &xsvnicp->state);
+ } else {
+ set_bit(XSVNIC_OVER_QUOTA, &xsvnicp->state);
+ xsvnicp->counters[XSVNIC_RX_QUOTA_EXCEEDED_COUNTER]++;
+ return done;
+ }
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ if (test_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state) &&
+ test_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state) &&
+ test_bit(XSVNIC_OPER_UP, &xsvnicp->state) &&
+ !test_bit(XSVNIC_DELETING, &xsvnicp->state)) {
+ set_bit(XSVNIC_INTR_ENABLED, &xsvnicp->state);
+ if (xscore_enable_rxintr(ctx)) {
+ if (napi_reschedule(&xsvnicp->napi)) {
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ goto again;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ return done;
+}
+
+static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
+ void **tcph, u64 *hdr_flags, void *xsvnicp)
+{
+ unsigned int ip_len;
+ struct iphdr *iph;
+
+ if (unlikely(skb->protocol != htons(ETH_P_IP)))
+ return -1;
+
+ /* Check for non-TCP packet */
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ if (iph->protocol != IPPROTO_TCP)
+ return -1;
+
+ ip_len = ip_hdrlen(skb);
+ skb_set_transport_header(skb, ip_len);
+ *tcph = tcp_hdr(skb);
+
+ /* check if IP header and TCP header are complete */
+ if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
+ return -1;
+
+ *hdr_flags = LRO_IPV4 | LRO_TCP;
+ *iphdr = iph;
+
+ return 0;
+}
+
+static void xsvnic_lro_setup(struct xsvnic *xsvnicp)
+{
+ xsvnicp->lro.lro_mgr.max_aggr = lro_max_aggr;
+ xsvnicp->lro.lro_mgr.max_desc = XSVNIC_MAX_LRO_DESCRIPTORS;
+ xsvnicp->lro.lro_mgr.lro_arr = xsvnicp->lro.lro_desc;
+ xsvnicp->lro.lro_mgr.get_skb_header = get_skb_hdr;
+ xsvnicp->lro.lro_mgr.features = LRO_F_NAPI;
+ xsvnicp->lro.lro_mgr.dev = xsvnicp->netdev;
+ xsvnicp->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+}
+
+static struct net_device_ops xsvnic_netdev_ops = {
+ .ndo_open = xsvnic_open,
+ .ndo_stop = xsvnic_stop,
+ .ndo_start_xmit = xsvnic_start_xmit,
+ .ndo_get_stats = xsvnic_get_stats,
+ .ndo_set_rx_mode = xsvnic_set_multicast,
+ .ndo_change_mtu = xsvnic_change_mtu,
+ .ndo_set_mac_address = xsvnic_set_mac_address,
+ .ndo_do_ioctl = xsvnic_ioctl,
+ .ndo_tx_timeout = xsvnic_tx_timeout,
+ .ndo_vlan_rx_add_vid = xsvnic_vlan_rx_add_vlanid,
+ .ndo_vlan_rx_kill_vid = xsvnic_vlan_rx_kill_vlanid
+};
+
+static int setup_netdev_info(struct net_device *netdev)
+{
+ struct xsvnic *xsvnicp = netdev_priv(netdev);
+ struct ib_device *hca = xsvnicp->xsmp_info.ib_device;
+ u64 mac;
+
+ netdev->watchdog_timeo = 10 * HZ;
+ netdev->tx_queue_len = xsvnic_tx_queue_len;
+ netdev->features |=
+ NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_SG | NETIF_F_GSO |
+ NETIF_F_GRO;
+ if (xsvnic_highdma)
+ netdev->features |= NETIF_F_HIGHDMA;
+ if (xsvnic_vlanaccel) {
+ pr_info("XSVNIC:%s Enabling vlan offloading ", __func__);
+ pr_info("[xsvnic %s]\n", xsvnicp->vnic_name);
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_RX;
+ }
+ if (lro)
+ xsvnicp->lro_mode = 1;
+ /*
+ * based on install_flag setting setup TSO flag.
+ * Checksun & SG must be enabled by default
+ * also in case of TSO
+ * NETIF_F_HW_VLAN_TX | NETIF_F_TSO
+ */
+ if (xsvnicp->install_flag & (XSVNIC_INSTALL_TCP_OFFL |
+ XSVNIC_INSTALL_UDP_OFFL)
+ || xsvnic_force_csum_offload)
+ netdev->features |= NETIF_F_IP_CSUM;
+
+ if (xsvnicp->lro_mode) {
+ xsvnic_lro_setup(xsvnicp);
+ netdev->features |= NETIF_F_LRO;
+ }
+ xg_setup_pseudo_device(netdev, hca);
+
+ SET_NETDEV_OPS(netdev, &xsvnic_netdev_ops);
+ mac = be64_to_cpu(xsvnicp->mac);
+ memcpy(netdev->dev_addr, (u8 *) (&mac) + 2, ETH_ALEN);
+ netif_napi_add(netdev, &xsvnicp->napi, xsvnic_poll, napi_weight);
+ if (xsvnic_esx_preregister_setup(netdev))
+ return -EINVAL;
+ return register_netdev(netdev);
+}
+
+struct xsvnic *xsvnic_get_xsvnic_by_vid(u64 resource_id)
+{
+ struct xsvnic *xsvnicp;
+
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (xsvnicp->resource_id == resource_id)
+ return xsvnicp;
+ }
+ return NULL;
+}
+
+struct xsvnic *xsvnic_get_xsvnic_by_name(char *vnic_name)
+{
+ struct xsvnic *xsvnicp;
+
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (strcmp(xsvnicp->vnic_name, vnic_name) == 0)
+ return xsvnicp;
+ }
+ return NULL;
+}
+
+/*
+ * Handle install message
+ */
+
+static int xsvnic_xsmp_install(xsmp_cookie_t xsmp_hndl,
+ struct xsvnic_xsmp_msg *xmsgp, void *data,
+ int len)
+{
+ struct net_device *netdev;
+ struct xsvnic *xsvnicp;
+ u16 mp_flag;
+ char vnic_name[128];
+ int ret = 0;
+ u64 m;
+ u8 update_state = 0;
+ u8 ecode = 0;
+ u8 is_ha = 0;
+
+ XSMP_FUNCTION("%s:\n", __func__);
+
+ xsvnicp = xsvnic_get_xsvnic_by_vid(be64_to_cpu(xmsgp->resource_id));
+ if (xsvnicp) {
+ /*
+ * Duplicate VID, send ACK, send oper state update
+ */
+ XSMP_ERROR("%s: Duplicate VNIC install message name", __func__);
+ XSMP_ERROR(",: %s, ", xmsgp->vnic_name);
+ XSMP_ERROR("VID=0x%llx\n", be64_to_cpu(xmsgp->resource_id));
+ ret = -EEXIST;
+ clear_bit(XSVNIC_SYNC_DIRTY, &xsvnicp->state);
+ update_state = 1;
+ xsvnicp->xsmp_hndl = xsmp_hndl;
+ xsvnic_update_tca_info(xsvnicp, xmsgp, 0);
+ goto send_ack;
+ }
+
+ XSMP_INFO("Installing VNIC : %s, VID=0x%llx\n",
+ xmsgp->vnic_name, be64_to_cpu(xmsgp->resource_id));
+
+ mp_flag = be16_to_cpu(xmsgp->mp_flag);
+ /*
+ * Append .P and .S to vnics
+ */
+ strncpy(vnic_name, xmsgp->vnic_name, sizeof(vnic_name) - 1);
+ if (mp_flag & (MP_XSVNIC_PRIMARY | MP_XSVNIC_SECONDARY)) {
+ if (xsvnic_havnic) {
+ char *pos;
+
+ strcpy(vnic_name, xmsgp->mp_group);
+
+ pos = strchr(vnic_name, '.');
+ if (pos != 0)
+ *pos = 0;
+ is_ha = 1;
+ strncpy(xmsgp->vnic_name, vnic_name,
+ sizeof(xmsgp->vnic_name) - 1);
+ if (mp_flag & MP_XSVNIC_PRIMARY)
+ strcat(vnic_name, "_P");
+ else
+ strcat(vnic_name, "_S");
+ } else {
+ pr_warn("XSVNIC: %s HA vnic not ", xmsgp->vnic_name);
+ pr_warn("supported\n");
+ ret = -EINVAL;
+ ecode = XSVNIC_NACK_ALLOCATION_ERROR;
+ goto dup_error;
+ }
+ }
+
+ if (xcpm_check_duplicate_names(xsmp_hndl, vnic_name,
+ XSMP_MESSAGE_TYPE_XVE) != 0) {
+ pr_info("%s Duplicate name %s\n", __func__, vnic_name);
+ goto dup_error;
+ }
+
+ xsvnicp = xsvnic_get_xsvnic_by_name(vnic_name);
+ if (xsvnicp) {
+ XSMP_ERROR("%s: Duplicate name: %s, VID=0x%llx\n",
+ __func__, xmsgp->vnic_name,
+ be64_to_cpu(xmsgp->resource_id));
+ ret = -EEXIST;
+ ecode = XSVNIC_NACK_DUP_NAME;
+ goto dup_error;
+ }
+ /*
+ * Check for the long name vnic
+ */
+ if (strlen(vnic_name) > XSVNIC_VNIC_NAMELENTH) {
+ pr_err("XSVNIC: vnic_name %s,", xmsgp->vnic_name);
+ pr_err("length > 15 not supported\n");
+ ret = -EINVAL;
+ ecode = XSVNIC_NACK_INVALID;
+ goto dup_error;
+ }
+
+ netdev = alloc_netdev(sizeof(*xsvnicp), vnic_name, NET_NAME_UNKNOWN,
+ &xsvnic_setup);
+ if (netdev == NULL) {
+ XSMP_ERROR("%s: alloc_netdev error name: %s, VID=0x%llx\n",
+ __func__, xmsgp->vnic_name,
+ be64_to_cpu(xmsgp->resource_id));
+ ret = -ENOMEM;
+ ecode = XSVNIC_NACK_ALLOCATION_ERROR;
+ goto dup_error;
+ }
+ xsvnicp = netdev_priv(netdev);
+ memset(xsvnicp, 0, sizeof(*xsvnicp));
+ xsvnicp->netdev = netdev;
+ INIT_LIST_HEAD(&xsvnicp->vlan_list);
+ INIT_LIST_HEAD(&xsvnicp->xsvnic_list);
+ init_completion(&xsvnicp->done);
+ mutex_init(&xsvnicp->mutex);
+ spin_lock_init(&xsvnicp->lock);
+ xsvnicp->resource_id = be64_to_cpu(xmsgp->resource_id);
+ xsvnicp->bandwidth = be16_to_cpu(xmsgp->vn_admin_rate);
+ m = xmsgp->mac_high;
+ xsvnicp->mac = m << 32 | xmsgp->mac_low;
+ memcpy(xsvnicp->vnic_name, vnic_name, XSVNIC_MAX_NAME_SIZE - 1);
+ xsvnicp->vnic_name[XSVNIC_MAX_NAME_SIZE - 1] = 0;
+ memcpy(xsvnicp->mp_group, xmsgp->mp_group, XSVNIC_MAX_NAME_SIZE - 1);
+ xsvnicp->mp_group[XSVNIC_MAX_NAME_SIZE - 1] = 0;
+ xsvnicp->sl = be16_to_cpu(xmsgp->service_level);
+ xsvnicp->mp_flag = be16_to_cpu(xmsgp->mp_flag);
+ xsvnicp->install_flag = be32_to_cpu(xmsgp->install_flag);
+ xsvnicp->mtu = be16_to_cpu(xmsgp->vn_mtu);
+ xsvnicp->tca_guid = be64_to_cpu(xmsgp->tca_guid);
+ xsvnicp->tca_lid = be16_to_cpu(xmsgp->tca_lid);
+ xsvnicp->xsmp_hndl = xsmp_hndl;
+ xcpm_get_xsmp_session_info(xsmp_hndl, &xsvnicp->xsmp_info);
+
+ /*
+ * In case of Non-HA set state to ACTIVE
+ */
+ if (!is_ha)
+ xsvnicp->ha_state = XSVNIC_HA_STATE_ACTIVE;
+ /*
+ * If MTU is JUMBO ot if it is LLE use default 256
+ */
+ if (xsvnicp->mtu > 1518 || !xsvnicp->xsmp_info.is_shca) {
+ xsvnicp->rx_ring_size = xsvnic_rxring_size;
+ xsvnicp->tx_ring_size = xsvnic_txring_size;
+ } else {
+ xsvnicp->rx_ring_size = xsvnic_rxring_size;
+ xsvnicp->tx_ring_size = xsvnic_txring_size;
+ }
+
+ netdev->mtu = xsvnicp->mtu;
+ /*
+ * Always set chassis ADMIN up by default
+ */
+ set_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state);
+ set_bit(XSVNIC_CHASSIS_ADMIN_SHADOW_UP, &xsvnicp->state);
+
+ INIT_DELAYED_WORK(&xsvnicp->sm_work, xsvnic_state_machine_work);
+
+ if (xsvnicp->install_flag & XSVNIC_INSTALL_TSO) {
+ xsvnicp->is_tso = 1;
+ /* BUG 22267 */
+ /* xsvnicp->lro_mode = 1; */
+ /*
+ * Add additional 8 bytes data for TSO header
+ */
+ netdev->hard_header_len += 8;
+ netdev->features |= NETIF_F_TSO;
+ }
+
+ if (xsvnic_conn_init(xsvnicp)) {
+ XSMP_ERROR("%s: xsvnic_conn_init error name: %s, VID=0x%llx\n",
+ __func__, vnic_name,
+ be64_to_cpu(xmsgp->resource_id));
+ ecode = XSVNIC_NACK_ALLOCATION_ERROR;
+ goto proc_error;
+ }
+
+ ret = xsvnic_add_proc_entry(xsvnicp);
+ if (ret) {
+ XSMP_ERROR("%s: procfs error name: %s, VID=0x%llx\n",
+ __func__, vnic_name,
+ be64_to_cpu(xmsgp->resource_id));
+ goto proc_error;
+ }
+
+ ret = setup_netdev_info(netdev);
+ if (ret) {
+ XSMP_ERROR("%s: setup_netdev_info error name: ,", __func__);
+ XSMP_ERROR("%s VID=0x%llx ret %x\n",
+ vnic_name, be64_to_cpu(xmsgp->resource_id), ret);
+ ecode = XSVNIC_NACK_ALLOCATION_ERROR;
+ goto setup_netdev_info_error;
+ }
+
+ netif_carrier_off(netdev);
+ netif_stop_queue(netdev);
+
+ if (xsvnic_esx_postregister_setup(netdev)) {
+ ecode = XSVNIC_NACK_ALLOCATION_ERROR;
+ goto post_reg_err;
+ }
+ /*
+ * Add it to the list, mutex held for all XSMP processing
+ */
+ list_add_tail(&xsvnicp->xsvnic_list, &xsvnic_list);
+ pr_info("Installed XSVNIC vnic %s, ", vnic_name);
+ pr_info("VID=0x%llx, tca_guid: 0x%llx, tca lid: 0x%x tso %d\n",
+ xsvnicp->resource_id, xsvnicp->tca_guid,
+ xsvnicp->tca_lid, xsvnicp->is_tso);
+ /*
+ * Send ADMIN down and OPER down
+ */
+ xsvnic_send_msg_to_xsigod(xsmp_hndl, data, len);
+ atomic_inc(&xsvnicp->ref_cnt);
+ xsvnicp->sm_delay = 1000;
+ queue_sm_work(xsvnicp, 0);
+ /*
+ * Send ACK
+ */
+send_ack:
+ ret = xsvnic_xsmp_send_ack(xsmp_hndl, xmsgp);
+ if (ret) {
+ XSMP_ERROR
+ ("%s: xsvnic_xsmp_send_ack error name: %s, VID=0x%llx\n",
+ __func__, xmsgp->vnic_name,
+ be64_to_cpu(xmsgp->resource_id));
+ }
+ if (update_state)
+ xsvnic_update_oper_state(xsvnicp);
+
+ return 0;
+
+post_reg_err:
+ unregister_netdev(netdev);
+setup_netdev_info_error:
+ xsvnic_remove_proc_entry(xsvnicp);
+proc_error:
+ free_netdev(netdev);
+dup_error:
+ (void)xsvnic_xsmp_send_nack(xsmp_hndl, xmsgp, sizeof(*xmsgp), ecode);
+ return ret;
+}
+
+static int xsvnic_remove_vnic(struct xsvnic *xsvnicp)
+{
+ struct vlan_entry *vlan, *tvlan;
+
+ mutex_lock(&xsvnicp->mutex);
+ xsvnic_io_disconnect(xsvnicp);
+ mutex_unlock(&xsvnicp->mutex);
+
+ xsvnic_put_ctx(xsvnicp);
+ /*
+ * Wait for refernce count to goto zero
+ */
+ while (atomic_read(&xsvnicp->ref_cnt)) {
+ DRV_ERROR("%s: Waiting for refcnt to become zero %d\n",
+ __func__, atomic_read(&xsvnicp->ref_cnt));
+ msleep(100);
+ }
+ mutex_lock(&xsvnic_mutex);
+ list_del(&xsvnicp->xsvnic_list);
+ mutex_unlock(&xsvnic_mutex);
+ vmk_notify_uplink(xsvnicp->netdev);
+ unregister_netdev(xsvnicp->netdev);
+ pr_info("XSVNIC: %s deleted\n", xsvnicp->vnic_name);
+ xscore_conn_destroy(&xsvnicp->ctrl_conn.ctx);
+ xscore_conn_destroy(&xsvnicp->data_conn.ctx);
+ list_for_each_entry_safe(vlan, tvlan, &xsvnicp->vlan_list, vlan_list) {
+ list_del(&vlan->vlan_list);
+ kfree(vlan);
+ }
+
+ if (xsvnicp->mc_addrs != NULL)
+ kfree(xsvnicp->mc_addrs);
+
+ xsvnic_remove_proc_entry(xsvnicp);
+ if (!test_bit(XSVNIC_SHUTDOWN, &xsvnicp->state)) {
+ if (xsvnicp->mp_flag &
+ (MP_XSVNIC_PRIMARY | MP_XSVNIC_SECONDARY)) {
+ /*
+ * Punt the message to xsigod to handle
+ */
+ xsvnic_send_cmd_to_xsigod(xsvnicp, XSMP_XSVNIC_DELETE);
+ }
+ /*
+ * Ideally need to figure out why userspace ACK is not working
+ */
+ xsvnic_xsmp_send_notification(xsvnicp->xsmp_hndl,
+ xsvnicp->resource_id,
+ XSMP_XSVNIC_DELETE);
+ }
+ free_netdev(xsvnicp->netdev);
+ return 0;
+}
+
+static int handle_admin_state_change(struct xsvnic *xsvnicp,
+ struct xsvnic_xsmp_msg *xmsgp)
+{
+ if (xmsgp->admin_state) {
+ XSMP_INFO("%s: VNIC %s Admin state up message\n", __func__,
+ xsvnicp->vnic_name);
+ if (!test_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state)) {
+ xsvnicp->counters[XSVNIC_ADMIN_UP_COUNTER]++;
+ set_bit(XSVNIC_CHASSIS_ADMIN_SHADOW_UP,
+ &xsvnicp->state);
+ set_bit(XSVNIC_SEND_ADMIN_STATE, &xsvnicp->state);
+ }
+ } else { /* Admin Down */
+ XSMP_INFO("%s: VNIC %s Admin state down message\n",
+ __func__, xsvnicp->vnic_name);
+ if (test_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state)) {
+ xsvnicp->counters[XSVNIC_ADMIN_DOWN_COUNTER]++;
+ clear_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state);
+ clear_bit(XSVNIC_CHASSIS_ADMIN_SHADOW_UP,
+ &xsvnicp->state);
+ set_bit(XSVNIC_SEND_ADMIN_STATE, &xsvnicp->state);
+ }
+ }
+ return 0;
+}
+
+static void xsvnic_xsmp_handle_oper_req(xsmp_cookie_t xsmp_hndl,
+ u64 resource_id)
+{
+ struct xsvnic *xsvnicp;
+
+ xsvnicp = xsvnic_get_xsvnic_by_vid(resource_id);
+ if (!xsvnicp) {
+ XSMP_ERROR("%s: request for invalid vid: 0x%llx\n",
+ __func__, resource_id);
+ return;
+ }
+ XSMP_INFO("VNIC: %s Oper Req from chassis\n", xsvnicp->vnic_name);
+ xsvnicp->counters[XSVNIC_OPER_REQ_COUNTER]++;
+ xsvnic_xsmp_send_oper_state(xsvnicp, resource_id,
+ test_bit(XSVNIC_OPER_UP, &xsvnicp->state)
+ ? XSMP_XSVNIC_OPER_UP :
+ XSMP_XSVNIC_OPER_DOWN);
+}
+
+static void xsvnic_update_tca_info(struct xsvnic *xsvnicp,
+ struct xsvnic_xsmp_msg *xmsgp,
+ int set_oper_down)
+{
+ /*
+ * Ignore invalid tca info
+ */
+ if (be64_to_cpu(xmsgp->tca_guid) == 0
+ || be16_to_cpu(xmsgp->tca_lid) == 0)
+ return;
+ if (xsvnicp->tca_guid != be64_to_cpu(xmsgp->tca_guid) ||
+ xsvnicp->tca_lid != be16_to_cpu(xmsgp->tca_lid)) {
+ xsvnicp->counters[XSVNIC_XT_LID_CHANGE_COUNTER]++;
+ pr_info("XSVNIC %s TCA id changed from", xsvnicp->vnic_name);
+ pr_info("(0x%Lx:0x%d) to (0x%Lx:0x%d)\n",
+ xsvnicp->tca_guid,
+ xsvnicp->tca_lid,
+ be64_to_cpu(xmsgp->tca_guid),
+ be16_to_cpu(xmsgp->tca_lid));
+ xsvnicp->tca_guid = be64_to_cpu(xmsgp->tca_guid);
+ xsvnicp->tca_lid = be16_to_cpu(xmsgp->tca_lid);
+ xsvnicp->ctrl_conn.ctx.dguid = xsvnicp->tca_guid;
+ xsvnicp->data_conn.ctx.dguid = xsvnicp->tca_guid;
+ xsvnicp->ctrl_conn.ctx.dlid = xsvnicp->tca_lid;
+ xsvnicp->data_conn.ctx.dlid = xsvnicp->tca_lid;
+ if (set_oper_down)
+ xsvnic_set_oper_down(xsvnicp, 1);
+ }
+}
+
+static int xsvnic_xsmp_update(xsmp_cookie_t xsmp_hndl,
+ struct xsvnic_xsmp_msg *xmsgp)
+{
+ u32 bitmask = be32_to_cpu(xmsgp->bitmask);
+ struct xsvnic *xsvnicp;
+ int ret = 0;
+ int send_ack = 1;
+
+ xsvnicp = xsvnic_get_xsvnic_by_vid(be64_to_cpu(xmsgp->resource_id));
+ if (!xsvnicp) {
+ XSMP_ERROR("%s: request for invalid vid: 0x%llx\n",
+ __func__, be64_to_cpu(xmsgp->resource_id));
+ return -EINVAL;
+ }
+
+ XSMP_INFO("%s: VNIC: %s bit mask: 0x%x\n", __func__,
+ xsvnicp->vnic_name, bitmask);
+
+ mutex_lock(&xsvnicp->mutex);
+
+ if (bitmask & XSVNIC_UPDATE_ADMIN_STATE) {
+ ret = handle_admin_state_change(xsvnicp, xmsgp);
+ /*
+ * Ack will be sent once QP's are brought down
+ */
+ send_ack = 0;
+ }
+
+ if (bitmask & XSVNIC_XT_STATE_DOWN) {
+ XSMP_INFO("%s: VNIC %s XT state down message\n",
+ __func__, xsvnicp->vnic_name);
+ xsvnicp->counters[XSVNIC_XT_DOWN_COUNTER]++;
+ set_bit(XSVNIC_XT_DOWN, &xsvnicp->state);
+ xsvnic_set_oper_down(xsvnicp, 1);
+ }
+
+ if (bitmask & XSVNIC_UPDATE_XT_CHANGE) {
+ XSMP_INFO("%s: VNIC %s XT state change message\n",
+ __func__, xsvnicp->vnic_name);
+ xsvnicp->counters[XSVNIC_XT_UPDATE_COUNTER]++;
+ xsvnic_update_tca_info(xsvnicp, xmsgp, 1);
+ clear_bit(XSVNIC_XT_DOWN, &xsvnicp->state);
+ }
+
+ if (send_ack && xsvnic_xsmp_send_ack(xsmp_hndl, xmsgp)) {
+ XSMP_ERROR
+ ("%s: xsvnic_xsmp_send_ack error name: %s, VID=0x%llx\n",
+ __func__, xmsgp->vnic_name,
+ be64_to_cpu(xmsgp->resource_id));
+ }
+ mutex_unlock(&xsvnicp->mutex);
+
+ return ret;
+}
+
+/*
+ * Called with global mutex held to protect xsvnic_list
+ */
+static void xsvnic_xsmp_sync_begin(xsmp_cookie_t xsmp_hndl, void *msg)
+{
+ struct xsvnic *xsvnicp;
+
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (xsmp_sessions_match(&xsvnicp->xsmp_info, xsmp_hndl)) {
+ xsvnicp->xsmp_hndl = xsmp_hndl;
+ /*
+ * Do not handle SYNC_BEGIN end. SOmetimes bug
+ * on IO director causes unnecessary delete
+ */
+#if 0
+ set_bit(XSVNIC_SYNC_DIRTY, &xsvnicp->state);
+#endif
+ }
+ }
+}
+
+static void xsvnic_update_oper_state(struct xsvnic *xsvnicp)
+{
+ if (xsvnicp->mp_flag & (MP_XSVNIC_PRIMARY | MP_XSVNIC_SECONDARY))
+ xsvnic_xsmp_send_ha_state(xsvnicp, xsvnicp->ha_state);
+ xsvnic_xsmp_send_oper_state(xsvnicp, xsvnicp->resource_id,
+ test_bit(XSVNIC_OPER_UP, &xsvnicp->state) ?
+ XSMP_XSVNIC_OPER_UP :
+ XSMP_XSVNIC_OPER_DOWN);
+}
+
+/*
+ * Called with global mutex held to protect xsvnic_list
+ */
+static void xsvnic_xsmp_sync_end(xsmp_cookie_t xsmp_hndl)
+{
+ struct xsvnic *xsvnicp;
+ unsigned long flags;
+
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (xsmp_sessions_match(&xsvnicp->xsmp_info, xsmp_hndl)) {
+ if (test_bit(XSVNIC_SYNC_DIRTY, &xsvnicp->state)) {
+ pr_info("XSVNIC %s ", xsvnicp->vnic_name);
+ pr_info("deleted due to sync end condition\n");
+ xsvnic_counters[XSVNIC_SYNC_END_DEL_COUNTER]++;
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ set_bit(XSVNIC_DELETING, &xsvnicp->state);
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ } else
+ xsvnic_update_oper_state(xsvnicp);
+ }
+ }
+}
+
+/*
+ * We set the DELETING bit and let sm_work thread handle delete
+ */
+static void xsvnic_handle_del_message(xsmp_cookie_t xsmp_hndl,
+ struct xsvnic_xsmp_msg *xmsgp)
+{
+ struct xsvnic *xsvnicp;
+ unsigned long flags;
+
+ xsvnicp = xsvnic_get_xsvnic_by_vid(be64_to_cpu(xmsgp->resource_id));
+ if (!xsvnicp) {
+ xsvnic_counters[XSVNIC_VNIC_DEL_NOVID_COUNTER]++;
+ return;
+ }
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ set_bit(XSVNIC_DELETING, &xsvnicp->state);
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+}
+
+static void xsvnic_send_cmd_to_xsigod(struct xsvnic *xsvnicp, int cmd)
+{
+ struct xsmp_message_header *xhdr;
+ struct xsvnic_xsmp_msg *xmsgp;
+ int tlen = sizeof(*xmsgp) + sizeof(*xhdr);
+
+ xhdr = xcpm_alloc_msg(tlen);
+ if (!xhdr)
+ return;
+ memset(xhdr, 0, tlen);
+ xhdr->type = XSMP_MESSAGE_TYPE_VNIC;
+ xhdr->length = tlen;
+ xmsgp = (struct xsvnic_xsmp_msg *)(xhdr + 1);
+ xmsgp->type = cmd;
+ strcpy(xmsgp->vnic_name, xsvnicp->vnic_name);
+ xmsgp->resource_id = cpu_to_be64(xsvnicp->resource_id);
+ xmsgp->mp_flag = cpu_to_be16(xsvnicp->mp_flag);
+ xmsgp->code = 0;
+ xmsgp->length = cpu_to_be16(sizeof(*xmsgp));
+ if (xcpm_send_msg_xsigod(xsvnicp->xsmp_hndl, xhdr, tlen))
+ xcpm_free_msg(xhdr);
+}
+
+static void xsvnic_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data,
+ int len)
+{
+ void *tmsg;
+
+ tmsg = xcpm_alloc_msg(len);
+ if (!tmsg)
+ return;
+ memcpy(tmsg, data, len);
+ if (xcpm_send_msg_xsigod(xsmp_hndl, tmsg, len))
+ xcpm_free_msg(tmsg);
+}
+
+static void xsvnic_handle_ip_req(xsmp_cookie_t xsmp_hndl, u8 *data, int len)
+{
+ struct xsvnic_xsmp_vlanip_msg *msgp =
+ (struct xsvnic_xsmp_vlanip_msg *)(data + sizeof(struct
+ xsmp_message_header));
+ struct xsvnic *xsvnicp;
+
+ XSMP_INFO("%s:XSMP message type VLAN IP\n", __func__);
+
+ xsvnicp = xsvnic_get_xsvnic_by_vid(be64_to_cpu(msgp->resource_id));
+ if (!xsvnicp) {
+ xsvnic_counters[XSVNIC_VNIC_DEL_NOVID_COUNTER]++;
+ return;
+ }
+ strcpy(msgp->ifname, xsvnicp->vnic_name);
+ msgp->mp_flag = cpu_to_be16(xsvnicp->mp_flag);
+ /*
+ * Punt this message to userspace
+ */
+ xsvnic_send_msg_to_xsigod(xsmp_hndl, data, len);
+}
+
+static void xsvnic_process_iscsi_info(xsmp_cookie_t xsmp_hndl, u8 *data,
+ int len)
+{
+ struct xsvnic_iscsi_msg *iscsi_msg = (struct xsvnic_iscsi_msg *)
+ (data + sizeof(struct xsmp_message_header));
+ struct xsvnic_iscsi_info *isp;
+ struct xsvnic *xsvnicp;
+
+ XSMP_INFO("%s:XSMP message type iscsi info\n", __func__);
+ xsvnicp =
+ xsvnic_get_xsvnic_by_vid(be64_to_cpu(iscsi_msg->iscsi_info.vid));
+ if (!xsvnicp) {
+ xsvnic_counters[XSVNIC_VNIC_DEL_NOVID_COUNTER]++;
+ return;
+ }
+ /*
+ * Now copy over iSCSI information
+ */
+ isp = &xsvnicp->iscsi_boot_info;
+ isp->vid = be64_to_cpu(iscsi_msg->iscsi_info.vid);
+ isp->vlan_id = be16_to_cpu(iscsi_msg->iscsi_info.vlan_id);
+ isp->mac = be64_to_cpu(iscsi_msg->iscsi_info.mac);
+ isp->protocol = be16_to_cpu(iscsi_msg->iscsi_info.protocol);
+ isp->port = be16_to_cpu(iscsi_msg->iscsi_info.port);
+ isp->lun = be16_to_cpu(iscsi_msg->iscsi_info.lun);
+ isp->mount_type = be16_to_cpu(iscsi_msg->iscsi_info.mount_type);
+ isp->role = iscsi_msg->iscsi_info.role;
+ isp->ip_type = iscsi_msg->iscsi_info.ip_type;
+ isp->ip_addr = iscsi_msg->iscsi_info.ip_addr;
+ isp->netmask = iscsi_msg->iscsi_info.netmask;
+ isp->gateway_ip_address = iscsi_msg->iscsi_info.gateway_ip_address;
+ isp->dns_ip_address = iscsi_msg->iscsi_info.dns_ip_address;
+ isp->target_ip_address = iscsi_msg->iscsi_info.target_ip_address;
+ memcpy(isp->vnic_name, iscsi_msg->iscsi_info.vnic_name,
+ XSVNIC_MAX_NAME_SIZE);
+ memcpy(isp->domain_name, iscsi_msg->iscsi_info.domain_name,
+ MAX_DOMAIN_NAME_LEN);
+ memcpy(isp->target_iqn, iscsi_msg->iscsi_info.target_iqn,
+ ISCSI_MOUNT_DEV_NAME_LEN);
+ memcpy(isp->target_portal_group,
+ iscsi_msg->iscsi_info.target_portal_group,
+ ISCSI_MOUNT_DEV_NAME_LEN);
+ memcpy(isp->initiator_iqn, iscsi_msg->iscsi_info.initiator_iqn,
+ ISCSI_MOUNT_DEV_NAME_LEN);
+ memcpy(isp->mount_dev, iscsi_msg->iscsi_info.mount_dev,
+ ISCSI_MOUNT_DEV_NAME_LEN);
+ memcpy(isp->mount_options, iscsi_msg->iscsi_info.mount_options,
+ ISCSI_MOUNT_DEV_NAME_LEN);
+ memcpy(isp->vol_group, iscsi_msg->iscsi_info.vol_group,
+ ISCSI_MOUNT_DEV_NAME_LEN);
+ memcpy(isp->vol_group_name, iscsi_msg->iscsi_info.vol_group_name,
+ ISCSI_MOUNT_DEV_NAME_LEN);
+}
+
+static void handle_xsvnic_xsmp_messages(xsmp_cookie_t xsmp_hndl, u8 *data,
+ int length)
+{
+ int hlen;
+ struct xsmp_message_header *header = (struct xsmp_message_header *)data;
+ struct xsvnic_xsmp_msg *xmsgp =
+ (struct xsvnic_xsmp_msg *)(data + sizeof(*header));
+
+ XSMP_FUNCTION("%s:\n", __func__);
+
+ if (length < sizeof(*header)) {
+ XSMP_ERROR("%s:XSMP message too short: act length: %d\n",
+ __func__, length);
+ return;
+ }
+ hlen = be16_to_cpu(header->length);
+ if (hlen > length) {
+ XSMP_ERROR
+ ("%s:XSMP header length greater than payload length %d\n",
+ __func__, length);
+ return;
+ }
+ if (header->type != XSMP_MESSAGE_TYPE_VNIC) {
+ XSMP_ERROR("%s:XSMP message type not VNIC type: %d\n",
+ __func__, header->type);
+ return;
+ }
+
+ XSMP_INFO("%s: XSMP message type: %d\n", __func__, xmsgp->type);
+
+ mutex_lock(&xsvnic_mutex);
+
+ switch (xmsgp->type) {
+ case XSMP_XSVNIC_VLANIP:
+ xsvnic_handle_ip_req(xsmp_hndl, data, length);
+ break;
+ case XSMP_XSVNIC_INFO_REQUEST:
+ break;
+ case XSMP_XSVNIC_INSTALL:
+ xsvnic_counters[XSVNIC_VNIC_INSTALL_COUNTER]++;
+ xsvnic_xsmp_install(xsmp_hndl, xmsgp, data, length);
+ break;
+ case XSMP_XSVNIC_DELETE:
+ xsvnic_handle_del_message(xsmp_hndl, xmsgp);
+ xsvnic_counters[XSVNIC_VNIC_DEL_COUNTER]++;
+ break;
+ case XSMP_XSVNIC_UPDATE:
+ xsvnic_counters[XSVNIC_VNIC_UPDATE_COUNTER]++;
+ xsvnic_xsmp_update(xsmp_hndl, xmsgp);
+ break;
+ case XSMP_XSVNIC_SYNC_BEGIN:
+ xsvnic_counters[XSVNIC_VNIC_SYNC_BEGIN_COUNTER]++;
+ xsvnic_xsmp_sync_begin(xsmp_hndl, xmsgp);
+ break;
+ case XSMP_XSVNIC_SYNC_END:
+ xsvnic_counters[XSVNIC_VNIC_SYNC_END_COUNTER]++;
+ xsvnic_xsmp_sync_end(xsmp_hndl);
+ break;
+ case XSMP_XSVNIC_OPER_REQ:
+ xsvnic_counters[XSVNIC_VNIC_OPER_REQ_COUNTER]++;
+ (void)xsvnic_xsmp_handle_oper_req(xsmp_hndl,
+ be64_to_cpu(xmsgp->
+ resource_id));
+ break;
+ case XSMP_XSVNIC_ISCSI_INFO:
+ xsvnic_counters[XSVNIC_ISCSI_INFO_COUNTER]++;
+ xsvnic_process_iscsi_info(xsmp_hndl, data, length);
+ break;
+ default:
+ xsvnic_counters[XSVNIC_VNIC_UNSUP_XSMP_COUNTER]++;
+ XSMP_ERROR("%s: Unsupported VNIX XSMP message: %d\n",
+ __func__, xmsgp->type);
+ break;
+ }
+ mutex_unlock(&xsvnic_mutex);
+}
+
+static void handle_xsvnic_xsmp_messages_work(struct work_struct *work)
+{
+ struct xsvnic_work *xwork = container_of(work, struct xsvnic_work,
+ work);
+
+ (void)handle_xsvnic_xsmp_messages(xwork->xsmp_hndl, xwork->msg,
+ xwork->len);
+ kfree(xwork->msg);
+ kfree(xwork);
+}
+
+/*
+ * Called from thread context
+ */
+static void xsvnic_receive_handler(xsmp_cookie_t xsmp_hndl, u8 *msg,
+ int length)
+{
+ struct xsvnic_work *work;
+ unsigned long flags;
+
+ XSMP_FUNCTION("%s:\n", __func__);
+
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work) {
+ XSMP_ERROR("%s: Out of memory\n", __func__);
+ kfree(msg);
+ return;
+ }
+ INIT_WORK(&work->work, handle_xsvnic_xsmp_messages_work);
+ work->xsmp_hndl = xsmp_hndl;
+ work->msg = msg;
+ work->len = length;
+ spin_lock_irqsave(&xsvnic_lock, flags);
+ /*
+ * Do some checks here
+ * Add counter
+ */
+ queue_work(xsvnic_wq, &work->work);
+ spin_unlock_irqrestore(&xsvnic_lock, flags);
+}
+
+/*
+ * Needs to be called with xsvnic_mutex lock held
+ */
+static void xsvnic_wait_for_removal(xsmp_cookie_t xsmp_hndl)
+{
+ int is_pres;
+ struct xsvnic *xsvnicp;
+
+ while (1) {
+ is_pres = 0;
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (xsmp_sessions_match(&xsvnicp->xsmp_info, xsmp_hndl))
+ is_pres = 1;
+ }
+ if (is_pres) {
+ mutex_unlock(&xsvnic_mutex);
+ msleep(100);
+ mutex_lock(&xsvnic_mutex);
+ } else
+ break;
+ }
+}
+
+/*
+ * Called from thread context
+ */
+static void xsvnic_xsmp_event_handler(xsmp_cookie_t xsmp_hndl, int event)
+{
+ struct xsvnic *xsvnicp;
+ unsigned long flags;
+
+ mutex_lock(&xsvnic_mutex);
+
+ switch (event) {
+ case XSCORE_PORT_UP:
+ case XSCORE_PORT_DOWN:
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (xsmp_sessions_match(&xsvnicp->xsmp_info,
+ xsmp_hndl)) {
+ if (event == XSCORE_PORT_DOWN) {
+ set_bit(XSVNIC_IBLINK_DOWN,
+ &xsvnicp->state);
+ xsvnic_set_oper_down(xsvnicp, 1);
+ xsvnicp->counters
+ [XSVNIC_IBLINK_DOWN_COUNTER]++;
+ } else {
+ clear_bit(XSVNIC_IBLINK_DOWN,
+ &xsvnicp->state);
+ xsvnicp->counters
+ [XSVNIC_IBLINK_UP_COUNTER]++;
+ }
+ }
+ }
+ break;
+ case XSCORE_DEVICE_REMOVAL:
+ xsvnic_counters[XSVNIC_DEVICE_REMOVAL_COUNTER]++;
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (xsmp_sessions_match(&xsvnicp->xsmp_info,
+ xsmp_hndl)) {
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ set_bit(XSVNIC_DELETING, &xsvnicp->state);
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ }
+ }
+ /*
+ * Now wait for all the vnics to be deleted
+ */
+ xsvnic_wait_for_removal(xsmp_hndl);
+ break;
+ case XSCORE_CONN_CONNECTED:
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (xsmp_sessions_match(&xsvnicp->xsmp_info, xsmp_hndl))
+ xsvnicp->xsmp_hndl = xsmp_hndl;
+ }
+ break;
+ default:
+ break;
+ }
+
+ mutex_unlock(&xsvnic_mutex);
+}
+
+static int xsvnic_xsmp_callout_handler(char *name)
+{
+ struct xsvnic *xsvnicp;
+ int ret = 0;
+
+ mutex_lock(&xsvnic_mutex);
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ /* CHECK for duplicate name */
+ if (strcmp(xsvnicp->vnic_name, name) == 0) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ mutex_unlock(&xsvnic_mutex);
+ return ret;
+}
+
+int xsvnic_xsmp_init(void)
+{
+ struct xsmp_service_reg_info service_info = {
+ .receive_handler = xsvnic_receive_handler,
+ .event_handler = xsvnic_xsmp_event_handler,
+ .callout_handler = xsvnic_xsmp_callout_handler,
+ .ctrl_message_type = XSMP_MESSAGE_TYPE_VNIC,
+ .resource_flag_index = RESOURCE_FLAG_INDEX_VNIC
+ };
+
+ struct xsmp_service_reg_info service_info_ha = {
+ .ctrl_message_type = XSMP_MESSAGE_TYPE_SESSION
+ };
+
+ xsvnic_xsmp_service_id = xcpm_register_service(&service_info);
+ if (xsvnic_xsmp_service_id < 0)
+ return xsvnic_xsmp_service_id;
+
+ if (!xsvnic_havnic) {
+ service_info_ha.resource_flag_index = RESOURCE_FLAG_INDEX_NO_HA;
+ xsigo_session_service_id =
+ xcpm_register_service(&service_info_ha);
+ if (xsigo_session_service_id < 0)
+ return xsigo_session_service_id;
+ }
+
+ return 0;
+}
+
+void xsvnic_xsmp_exit(void)
+{
+ (void)xcpm_unregister_service(xsvnic_xsmp_service_id);
+ xsvnic_xsmp_service_id = -1;
+ if (!xsvnic_havnic) {
+ (void)xcpm_unregister_service(xsigo_session_service_id);
+ xsigo_session_service_id = -1;
+ }
+
+}
+
+int xsvnic_wait_for_first(void)
+{
+ int secs = xsvnic_wait_per_vnic;
+
+ /* Total wait is xsvnic_wait_for_vnic seconds */
+ mutex_lock(&xsvnic_mutex);
+ DRV_INFO("%s: Checking for first Vnic to be up\n", __func__);
+ while (list_empty(&xsvnic_list) && secs) {
+ mutex_unlock(&xsvnic_mutex);
+ msleep(1000);
+ secs--;
+ mutex_lock(&xsvnic_mutex);
+ }
+ mutex_unlock(&xsvnic_mutex);
+ DRV_INFO("%s: Finished Waiting for first Vnic to be up\n", __func__);
+ return secs > 0;
+}
+
+int xsvnic_all_up(void)
+{
+ int allup = 1;
+ struct xsvnic *xsvnicp;
+
+ mutex_lock(&xsvnic_mutex);
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (!test_bit(XSVNIC_OPER_UP, &xsvnicp->state))
+ allup = 0;
+ }
+ mutex_unlock(&xsvnic_mutex);
+ return allup;
+}
+
+static int xsvnic_wait_for_all_vnics_up(void)
+{
+ int time, delayms = 200;
+
+ /* Total wait is xsvnic_wait_for_vnic seconds */
+ DRV_INFO("%s: Checking for VNIC's to be up\n", __func__);
+ for (time = 0; time < xsvnic_wait_per_vnic * 1000; time += delayms) {
+ if (xsvnic_all_up()) {
+ DRV_INFO("%s: VNIC's are up\n", __func__);
+ return 1;
+ }
+ msleep(delayms);
+ }
+ DRV_INFO("%s: VNIC's are not up\n", __func__);
+ return 0;
+}
+
+static void xsvnic_wait_for_vnics(void)
+{
+ unsigned long wait_time = jiffies;
+
+ if (xsvnic_wait_in_boot && xscore_wait_for_sessions(0)) {
+ pr_info("XSVNIC: Waiting for VNIC's to come up .....\n");
+ if (xsvnic_wait_for_first())
+ xsvnic_wait_for_all_vnics_up();
+ else
+ DRV_INFO("%s: No VNIC's present\n", __func__);
+ }
+ xsvnic_wait_time = jiffies - wait_time;
+}
+
+/*
+ * Module initialization entry point
+ */
+
+static int __init xsvnic_init(void)
+{
+ int ret;
+
+ DRV_FUNCTION("%s\n", __func__);
+
+ spin_lock_init(&xsvnic_lock);
+ INIT_LIST_HEAD(&xsvnic_list);
+ mutex_init(&xsvnic_mutex);
+ xsvnic_wq = create_singlethread_workqueue("xsv_wq");
+ if (!xsvnic_wq) {
+ DRV_ERROR("%s: create_singlethread_workqueue failed\n",
+ __func__);
+ return -ENOMEM;
+ }
+ xsvnic_io_wq = create_singlethread_workqueue("xsviowq");
+ if (!xsvnic_io_wq) {
+ DRV_ERROR("%s: create_singlethread_workqueue failed\n",
+ __func__);
+ ret = -ENOMEM;
+ goto io_wq_error;
+ }
+ ret = xsvnic_create_procfs_root_entries();
+ if (ret) {
+ DRV_ERROR("%s: xsvnic_create_procfs_root_entries failed %d\n",
+ __func__, ret);
+ goto proc_error;
+ }
+ ret = xsvnic_xsmp_init();
+ if (ret) {
+ DRV_ERROR("%s: xsvnic_xsmp_init failed %d\n", __func__, ret);
+ goto xsmp_err;
+ }
+ /* Wait for VNIC's to come up */
+ xsvnic_wait_for_vnics();
+ return ret;
+
+xsmp_err:
+ xsvnic_remove_procfs_root_entries();
+io_wq_error:
+ destroy_workqueue(xsvnic_io_wq);
+proc_error:
+ destroy_workqueue(xsvnic_wq);
+ return ret;
+}
+
+static void __exit xsvnic_exit(void)
+{
+ struct xsvnic *xsvnicp;
+ unsigned long flags;
+
+ DRV_FUNCTION("%s\n", __func__);
+ xsvnic_xsmp_exit();
+ mutex_lock(&xsvnic_mutex);
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ spin_lock_irqsave(&xsvnicp->lock, flags);
+ set_bit(XSVNIC_DELETING, &xsvnicp->state);
+ set_bit(XSVNIC_SHUTDOWN, &xsvnicp->state);
+ spin_unlock_irqrestore(&xsvnicp->lock, flags);
+ }
+ while (!list_empty(&xsvnic_list)) {
+ mutex_unlock(&xsvnic_mutex);
+ msleep(100);
+ mutex_lock(&xsvnic_mutex);
+ }
+ mutex_unlock(&xsvnic_mutex);
+ flush_workqueue(xsvnic_wq);
+ destroy_workqueue(xsvnic_wq);
+ flush_workqueue(xsvnic_io_wq);
+ destroy_workqueue(xsvnic_io_wq);
+ xsvnic_remove_procfs_root_entries();
+}
+
+int xsvnic_iscsi_present(void)
+{
+ int pres = 0;
+ struct xsvnic *xsvnicp;
+
+ mutex_lock(&xsvnic_mutex);
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (xsvnicp->iscsi_boot_info.initiator_iqn[0] != '\0')
+ pres = 1;
+ }
+ mutex_unlock(&xsvnic_mutex);
+ return pres;
+}
+EXPORT_SYMBOL(xsvnic_iscsi_present);
+
+int xsvnic_get_all_names(char **names, int max)
+{
+ struct xsvnic *xsvnicp;
+ int count = 0;
+
+ mutex_lock(&xsvnic_mutex);
+ list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+ if (count < max)
+ names[count++] =
+ kstrdup(xsvnicp->vnic_name, GFP_KERNEL);
+ }
+ mutex_unlock(&xsvnic_mutex);
+ return count;
+}
+EXPORT_SYMBOL(xsvnic_get_all_names);
+
+module_init(xsvnic_init);
+module_exit(xsvnic_exit);
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+
+#include "xsvnic.h"
+
+static int xs_seq_file;
+module_param(xs_seq_file, int, 0644);
+
+MODULE_PARM_DESC(xs_seq_file,
+ "Enabling the sequence files to print large data in /proc entries");
+
+static char *glob_counter_name[XSVNIC_MAX_GLOB_COUNTERS] = {
+ "sync end del count:\t\t",
+ "vnic install count:\t\t",
+ "vnic del count:\t\t\t",
+ "vnic del novid count:\t\t",
+ "vnic update count:\t\t",
+ "vnic sync begin count:\t\t",
+ "vnic sync end count:\t\t",
+ "vnic oper req count:\t\t",
+ "vnic unsup cmd count:\t\t",
+ "iscsi info count:\t\t",
+ "xscore device remove count:\t",
+};
+
+static char *counter_name[XSVNIC_MAX_COUNTERS] = {
+ "ctrl_heartbeat_count:\t\t",
+ "data_heartbeat_count:\t\t",
+ "hbeat send error count:\t\t",
+ "napi_poll_count:\t\t",
+ "short_tx_pkt_count:\t\t",
+ "tx_skb_count:\t\t\t",
+ "tx_skb_tso_count:\t\t",
+ "tx_skb_noheadroom_count:\t",
+ "tx skb free count:\t\t",
+ "tx skb free count (reaped):\t",
+ "tx head expand count:\t\t",
+ "tx head expand error count:\t",
+ "tx vlan count:\t\t\t",
+ "tx error count:\t\t\t",
+ "tx wrb exhaust:\t\t\t",
+ "tx drop oper down count:\t",
+ "tx drop skb error count:\t",
+ "tx skb expand error count:\t",
+ "tx drop ring full count:\t",
+ "rx_skb_count:\t\t\t",
+ "rx_skb_alloc_count:\t\t",
+ "rx_skb_sendtovlangrp:\t\t",
+ "rx_skb_batched_count:\t\t",
+ "rx_skb_freed_count:\t\t",
+ "rx_bat_maxsegs_count:\t\t",
+ "rx_bat_numsegs_below_5:\t\t",
+ "rx_bat_numsegs_between_5_10:\t",
+ "rx_bat_numsegs_between_10_20:\t",
+ "rx_bat_numsegs_above_20:\t",
+ "rx_bat_8k_segs_count:\t\t",
+ "rx skb offload count:\t\t",
+ "rx skb offl frag count:\t\t",
+ "rx skb offlnonipv4 count:\t",
+ "rx error count:\t\t\t",
+ "rx quota exceeded count:\t",
+ "rx no buf count:\t\t",
+ "rx max packet:\t\t\t",
+ "rx min packet:\t\t\t",
+ "rx lro Aggregated Packet count:\t",
+ "rx lro Flushed count:\t\t",
+ "rx lro Average Aggregated Count:\t",
+ "rx lro No Descriptor Count:\t",
+ "tx max packet:\t\t\t",
+ "tx min packet:\t\t\t",
+ "tx max time spent:\t\t",
+ "tx min time spent:\t\t",
+ "napi sched count:\t\t",
+ "napi notsched count:\t\t",
+ "io port up count:\t\t",
+ "io port down count:\t\t",
+ "io dup port up count:\t\t",
+ "io dup port down count:\t\t",
+ "start rx sent count:\t\t",
+ "stop rx sent count:\t\t",
+ "start rx resp count:\t\t",
+ "rx bad resp count:\t\t",
+ "open count:\t\t\t",
+ "stop count:\t\t\t",
+ "getstats count:\t\t\t",
+ "set mcast count:\t\t",
+ "multicast resp count:\t\t",
+ "multicast no resp count:\t",
+ "vlan add count:\t\t\t",
+ "vlan del count:\t\t\t",
+ "ioctl count:\t\t\t",
+ "mac addr change:\t\t",
+ "wdog timeout count:\t\t",
+ "oper req count:\t\t\t",
+ "xt down count:\t\t\t",
+ "xt update count:\t\t",
+ "xt lid change count:\t\t",
+ "admin up count:\t\t\t",
+ "admin down count:\t\t",
+ "sm poll count:\t\t\t",
+ "qp error count:\t\t\t",
+ "IB recovery count:\t\t",
+ "IB recovered count:\t\t",
+ "IB link down count:\t\t",
+ "IB link up count:\t\t",
+ "ctrl conn ok count:\t\t",
+ "ctrl rdisc count:\t\t",
+ "ctrl conn err count:\t\t",
+ "ctrl recv err count:\t\t",
+ "data conn ok count:\t\t",
+ "data rdisc count:\t\t",
+ "data conn err count:\t\t",
+ "sent oper up count:\t\t",
+ "sent oper down count:\t\t",
+ "sent oper state failure count:\t",
+ "sent oper state success count:\t",
+ "drop rx standby count:\t\t",
+ "drop tx standby count:\t\t",
+};
+
+#define atoi(str) kstrtoul(((str != NULL) ? str : ""), -1, 0)
+#define XS_RESCHED_NAPI "napi_sched"
+#define XS_READIB_BUF "read_ibbuf"
+#define XS_RXBATCHING_ON "rbatch on"
+#define XS_RXBATCHING_OFF "rbatch off"
+#define XS_SLAVE_ACTIVE "slave active"
+#define XS_SLAVE_BACKUP "slave backup"
+
+struct proc_dir_entry *proc_root_xsvnic = NULL;
+struct proc_dir_entry *proc_root_xsvnic_dev = NULL;
+struct proc_dir_entry *iscsi_boot = NULL;
+
+static ssize_t xsvnic_proc_write_debug(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp);
+static int xsvnic_proc_read_debug(struct seq_file *m, void *data);
+static int xsvnic_proc_open_debug(struct inode *inode, struct file *file);
+static ssize_t xsvnic_proc_write_iscsi_boot(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp);
+static int xsvnic_proc_read_iscsi_boot(struct seq_file *m, void *data);
+static int xsvnic_proc_open_iscsi_boot(struct inode *inode, struct file *file);
+static ssize_t xsvnic_proc_write_device(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp);
+static int xsvnic_proc_read_device(struct seq_file *m, void *data);
+static int xsvnic_proc_open_device(struct inode *inode, struct file *file);
+static ssize_t xsvnic_proc_write_device_counters(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp);
+static int xsvnic_proc_read_device_counters(struct seq_file *m, void *data);
+static int xsvnic_proc_open_device_counters(struct inode *inode,
+ struct file *file);
+static void *xsvnic_seq_start(struct seq_file *seq, loff_t *pos);
+static void *xsvnic_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+static int xsvnic_seq_show(struct seq_file *seq, void *v);
+static void xsvnic_seq_stop(struct seq_file *seq, void *v);
+static int xsvnic_open(struct inode *inode, struct file *file);
+
+static const struct file_operations xsvnic_debug_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xsvnic_proc_open_debug,
+ .read = seq_read,
+ .write = xsvnic_proc_write_debug,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations xsvnic_iscsi_boot_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xsvnic_proc_open_iscsi_boot,
+ .read = seq_read,
+ .write = xsvnic_proc_write_iscsi_boot,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations xsvnic_device_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xsvnic_proc_open_device,
+ .read = seq_read,
+ .write = xsvnic_proc_write_device,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations xsvnic_device_counters_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xsvnic_proc_open_device_counters,
+ .read = seq_read,
+ .write = xsvnic_proc_write_device_counters,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct seq_operations xsvnic_seq_ops = {
+ .start = xsvnic_seq_start,
+ .next = xsvnic_seq_next,
+ .stop = xsvnic_seq_stop,
+ .show = xsvnic_seq_show
+};
+
+static const struct file_operations xsvnic_file_ops = {
+ .owner = THIS_MODULE,
+ .open = xsvnic_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+static int xsvnic_proc_read_device(struct seq_file *m, void *data)
+{
+ struct xsvnic *vp = m->private;
+ unsigned long tsecs = 0, tmins = 0, thrs = 0;
+ char tmp_buf[512];
+
+ seq_printf(m, "Admin state:\t\t\t%s\n",
+ test_bit(XSVNIC_CHASSIS_ADMIN_UP,
+ &vp->state) ? "Up" : "Down");
+ seq_printf(m, "Chassis Name:\t\t\t%s\n", vp->xsmp_info.chassis_name);
+ seq_printf(m, "Chassis Version:\t\t%x\n", vp->xsmp_info.version);
+ seq_printf(m, "Server-Profile Name:\t\t%s\n",
+ vp->xsmp_info.session_name);
+ seq_puts(m, "Config parameters:\n");
+ seq_printf(m, "TCA GUID:\t\t\t0x%Lx\n", vp->tca_guid);
+ seq_printf(m, "TCA lid:\t\t\t0x%x\n", vp->tca_lid);
+ seq_printf(m, "MAC addr:\t\t\t0x%Lx\n", vp->mac);
+ seq_printf(m, "VID:\t\t\t\t0x%Lx\n", vp->resource_id);
+ seq_printf(m, "mtu:\t\t\t\t%d\n", vp->mtu);
+ seq_printf(m, "ring size:\t\t\t%d\n", vp->rx_ring_size);
+ seq_printf(m, "bandwidth:\t\t\t%d\n", vp->bandwidth);
+ seq_puts(m, "\n");
+ seq_printf(m, "link/xsmp hndl:\t\t\t%p\n", vp->xsmp_hndl);
+ seq_printf(m, "Port link state: \t\t%s\n",
+ test_bit(XSVNIC_PORT_LINK_UP, &vp->state) ? "Up" : "Down");
+ seq_printf(m, "Port link speed: \t\t%d Mbps\n", vp->port_speed);
+
+ strcpy(tmp_buf, "None");
+ if (vp->mp_flag & MP_XSVNIC_PRIMARY) {
+ strcpy(tmp_buf, "Primary");
+ if (vp->mp_flag & MP_XSVNIC_AUTO_SWITCH)
+ strcat(tmp_buf, " + AutoSwitchover");
+ } else if (vp->mp_flag & MP_XSVNIC_SECONDARY) {
+ strcpy(tmp_buf, "Secondary");
+ if (vp->mp_flag & MP_XSVNIC_AUTO_SWITCH)
+ strcat(tmp_buf, " + AutoSwitchover");
+ }
+
+ seq_printf(m, "HA flags:\t\t\t%s\n", tmp_buf);
+
+ seq_printf(m, "netdev features:\t\t0x%x\n", (u32) vp->netdev->features);
+
+ seq_printf(m, "Checksum offload:\t\t%s\n",
+ (vp->install_flag &
+ (XSVNIC_INSTALL_TCP_OFFL | XSVNIC_INSTALL_UDP_OFFL))
+ ? "Enabled" : "Disabled");
+
+ seq_printf(m, "TSO:\t\t\t\t%s\n",
+ (vp->netdev->
+ features & NETIF_F_TSO) ? "Enabled" : "Disabled");
+
+ seq_printf(m, "LRO:\t\t\t\t%s\n",
+ (vp->lro_mode) ? "Enabled" : "Disabled");
+
+ seq_printf(m, "RX batching :\t\t\t%s\n",
+ (vp->is_rxbatching) ? "Enabled" : "Disabled");
+
+ seq_printf(m, "8k IB mtu :\t\t\t%s\n",
+ ((vp->install_flag & XSVNIC_8K_IBMTU)
+ && vp->xsmp_info.is_shca)
+ ? "Enabled" : "Disabled");
+ seq_printf(m, "VLAN offload :\t\t\t%s\n",
+ (xsvnic_vlanaccel != 0) ? "Enabled" : "Disabled");
+ seq_printf(m, "vlan count:\t\t\t%d\n", vp->vlan_count);
+ seq_printf(m, "mcast count:\t\t\t%d (promisc: %s)\n",
+ vp->mc_count, vp->iff_promisc ? "on" : "off");
+
+ seq_printf(m,
+ "Data Connection:\t\t%s (%d), Mode: %s InterruptMode for TX: %s RX: %s\n",
+ vp->data_conn.state ==
+ XSVNIC_CONN_CONNECTED ? "Connected" : "Not connected",
+ vp->data_conn.state,
+ vp->data_conn.ctx.
+ features & XSCORE_USE_CHECKSUM ? "Checksum" : "ICRC",
+ vp->data_conn.ctx.
+ features & XSCORE_NO_SEND_COMPL_INTR ? "Disabled" :
+ "Enabled",
+ vp->data_conn.ctx.
+ features & XSCORE_NO_RECV_COMPL_INTR ? "Disabled" :
+ "Enabled");
+
+ seq_printf(m, "Control Connection:\t\t%s (%d), Mode: %s\n",
+ vp->ctrl_conn.state == XSVNIC_CONN_CONNECTED ?
+ "Connected" : "Not connected", vp->ctrl_conn.state,
+ vp->ctrl_conn.ctx.
+ features & XSCORE_USE_CHECKSUM ? "Checksum" : "ICRC");
+ seq_puts(m, "Interrupt Coalescing parameters\n");
+ seq_printf(m, "TX:\t\t\t\t MaxUSeconds: %d MaxFrames: %d\n",
+ vp->data_conn.ctx.tx_coalesce_usecs,
+ vp->data_conn.ctx.tx_max_coalesced_frames);
+ seq_printf(m, "RX:\t\t\t\t MaxUSeconds: %d MaxFrames: %d\n",
+ vp->data_conn.ctx.rx_coalesce_usecs,
+ vp->data_conn.ctx.rx_max_coalesced_frames);
+
+ if (vp->data_conn.state == XSVNIC_CONN_CONNECTED &&
+ vp->ctrl_conn.state == XSVNIC_CONN_CONNECTED) {
+ int lqpn, dqpn;
+
+ tsecs = jiffies_to_msecs(jiffies - vp->jiffies) / 1000;
+ thrs = tsecs / (60 * 60);
+ tmins = (tsecs / 60 - (thrs * 60));
+ tsecs = tsecs - (tmins * 60) - (thrs * 60 * 60);
+
+ lqpn = vp->ctrl_conn.ctx.local_qpn;
+ dqpn = vp->ctrl_conn.ctx.remote_qpn;
+ seq_printf(m,
+ "Ctrl QP end points:\t\t(0x%x, %d) : (0x%x, %d)\n",
+ lqpn, lqpn, dqpn, dqpn);
+
+ lqpn = vp->data_conn.ctx.local_qpn;
+ dqpn = vp->data_conn.ctx.remote_qpn;
+ seq_printf(m,
+ "Data QP end points:\t\t(0x%x, %d) : (0x%x, %d)\n",
+ lqpn, lqpn, dqpn, dqpn);
+ }
+ seq_printf(m, "XSVNIC Uptime:\t\t\t%lu hrs %lu mins %lu seconds\n",
+ thrs, tmins, tsecs);
+ seq_puts(m, "\n");
+
+ seq_puts(m, "Operational state:\n");
+ if (vp->mp_flag & (MP_XSVNIC_PRIMARY | MP_XSVNIC_SECONDARY)) {
+ seq_printf(m, "HA VNIC state:\t\t\t%s\n",
+ vp->ha_state ==
+ XSVNIC_HA_STATE_STANDBY ? "Standby" : "Active");
+ seq_printf(m, "HA Active State:\t\t%s\n",
+ test_bit(XSVNIC_STATE_STDBY,
+ &vp->
+ state) ? XS_SLAVE_BACKUP : XS_SLAVE_ACTIVE);
+ }
+
+ seq_printf(m, "Netdev state:\t\t\t0x%lu\n", vp->netdev->state);
+ seq_printf(m, "Netdev napi state:\t\t0x%lu\n", vp->napi.state);
+
+ tmp_buf[0] = 0;
+ if (netif_running(vp->netdev))
+ strcat(tmp_buf, "netdev running");
+ else
+ strcat(tmp_buf, "netif not running");
+ if (netif_queue_stopped(vp->netdev))
+ strcat(tmp_buf, " + netdev stopped");
+ else
+ strcat(tmp_buf, " + netdev not stopped");
+
+ seq_printf(m, "%s\n\n", tmp_buf);
+
+ seq_printf(m, "Carrier state:\t\t\t%s\n",
+ netif_carrier_ok(vp->netdev) ? "Up" : "Down");
+
+ seq_printf(m, "VNIC up:\t\t\t%s\n",
+ test_bit(XSVNIC_OPER_UP, &vp->state) ? "Yes" : "No");
+
+ seq_printf(m, "VNIC state:\t\t\t0x%x\n", (unsigned int)vp->state);
+ tmp_buf[0] = 0;
+ if (test_bit(XSVNIC_OPER_UP, &vp->state))
+ strcat(tmp_buf, "Oper Up");
+ else
+ strcat(tmp_buf, "Oper Down");
+ if (test_bit(XSVNIC_OS_ADMIN_UP, &vp->state))
+ strcat(tmp_buf, " + OS Admin Up");
+ else
+ strcat(tmp_buf, " + OS Admin Down");
+ if (test_bit(XSVNIC_CHASSIS_ADMIN_UP, &vp->state))
+ strcat(tmp_buf, " + Chassis Admin Up");
+ else
+ strcat(tmp_buf, " + Chassis Admin Down");
+ if (test_bit(XSVNIC_PORT_LINK_UP, &vp->state))
+ strcat(tmp_buf, " + Port Link Up");
+ else
+ strcat(tmp_buf, " + Port Link Down");
+ if (test_bit(XSVNIC_START_RX_SENT, &vp->state))
+ strcat(tmp_buf, " + Start Rx Sent");
+ else
+ strcat(tmp_buf, " + No Start Rx");
+ if (test_bit(XSVNIC_START_RESP_RCVD, &vp->state))
+ strcat(tmp_buf, " + Start Rx Resp Rcvd");
+ else
+ strcat(tmp_buf, " + No Start Rx Resp");
+
+ if (test_bit(XSVNIC_INTR_ENABLED, &vp->state))
+ strcat(tmp_buf, " + Rx Intr Enabled");
+ else
+ strcat(tmp_buf, " + Rx Intr Disabled");
+
+ if (test_bit(XSVNIC_RX_NOBUF, &vp->state))
+ strcat(tmp_buf, " + Rx No Buf");
+
+ if (test_bit(XSVNIC_XT_DOWN, &vp->state))
+ strcat(tmp_buf, " + XT Down");
+
+ if (test_bit(XSVNIC_IBLINK_DOWN, &vp->state))
+ strcat(tmp_buf, " + IB Link Down");
+
+ if (test_bit(XSVNIC_OVER_QUOTA, &vp->state))
+ strcat(tmp_buf, " + No RX Quota");
+
+ seq_printf(m, "%s\n\n", tmp_buf);
+
+ /* Get LRO statistics */
+ if (vp->lro_mode) {
+ vp->counters[XSVNIC_RX_LRO_AGGR_PKTS] +=
+ vp->lro.lro_mgr.stats.aggregated;
+ vp->counters[XSVNIC_RX_LRO_FLUSHED_PKT] +=
+ vp->lro.lro_mgr.stats.flushed;
+ if (vp->lro.lro_mgr.stats.flushed)
+ vp->counters[XSVNIC_RX_LRO_AVG_AGGR_PKTS] +=
+ vp->lro.lro_mgr.stats.aggregated /
+ vp->lro.lro_mgr.stats.flushed;
+ else
+ vp->counters[XSVNIC_RX_LRO_AVG_AGGR_PKTS] = 0;
+ vp->counters[XSVNIC_RX_LRO_NO_DESCRIPTORS] +=
+ vp->lro.lro_mgr.stats.no_desc;
+ }
+
+ seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared);
+ return 0;
+}
+
+static ssize_t xsvnic_proc_write_device(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+ struct xsvnic *vp = PDE_DATA(file_inode(file));
+ int ret;
+ char action[64];
+
+ ret = sscanf(buffer, "%s", action);
+ if (ret != 1)
+ return -EINVAL;
+
+ if ((strlen(action) == 1) && (atoi(action) == 0)) {
+ /* Clear counters */
+ memset(vp->counters, 0, sizeof(vp->counters));
+ vp->counters_cleared++;
+ return count;
+ }
+
+ /*
+ * sscanf cannot copies spaces as in "rbatch on" so do a copy
+ */
+ memset(action, 0, sizeof(action));
+ strncpy(action, buffer, 12);
+
+ if (strcmp(action, XS_RESCHED_NAPI) == 0)
+ set_bit(XSVNIC_TRIGGER_NAPI_SCHED, &vp->state);
+ else if (strcmp(action, XS_READIB_BUF) == 0) {
+ struct xscore_buf_info binfo;
+ struct xscore_conn_ctx *ctx = &vp->data_conn.ctx;
+
+ ret = xscore_read_buf(ctx, &binfo);
+ if (ret != 1 || binfo.status)
+ pr_info("xsvnic: %s No data found, status %d\n",
+ vp->vnic_name, binfo.status);
+ else {
+ pr_info("xsvnic: %s", vp->vnic_name);
+ pr_info("Data found ");
+ pr_info("status %d", binfo.status);
+ pr_info("length %d\n", binfo.sz);
+ dev_kfree_skb_any(binfo.cookie);
+ }
+ } else if (strncmp(action, XS_RXBATCHING_ON, 9) == 0) {
+ ret = xsvnic_change_rxbatch(vp, 1);
+ if (ret != 1)
+ pr_info("xsvnic: %s Cannot turn on rx batching %x\n",
+ vp->vnic_name, ret);
+ } else if (strcmp(action, XS_RXBATCHING_OFF) == 0) {
+ ret = xsvnic_change_rxbatch(vp, 0);
+ if (ret != 1)
+ pr_info("xsvnic: %s Cannot turn off rx batching %x\n",
+ vp->vnic_name, ret);
+ } else if (strcmp(action, XS_SLAVE_ACTIVE) == 0) {
+ pr_info("%s XSVNIC[%s] Setting as active slave\n", __func__,
+ vp->vnic_name);
+ clear_bit(XSVNIC_STATE_STDBY, &vp->state);
+ } else if (strcmp(action, XS_SLAVE_BACKUP) == 0) {
+ pr_info("%s XSVNIC[%s] Setting as standby slave\n",
+ __func__, vp->vnic_name);
+ set_bit(XSVNIC_STATE_STDBY, &vp->state);
+ } else {
+ pr_info("xsvnic: %s echo'ing %s is not valid\n",
+ vp->vnic_name, action);
+ }
+
+ return count;
+}
+
+static int xsvnic_proc_open_device(struct inode *inode, struct file *file)
+{
+ return single_open(file, xsvnic_proc_read_device,
+ PDE_DATA(file_inode(file)));
+}
+
+static int xsvnic_proc_read_device_counters(struct seq_file *m, void *data)
+{
+ struct xsvnic *vp = m->private;
+ int i;
+
+ for (i = 0; i < XSVNIC_MAX_COUNTERS; i++)
+ seq_printf(m, "%s%u\n", counter_name[i], vp->counters[i]);
+ seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared);
+
+ return 0;
+}
+
+static ssize_t xsvnic_proc_write_device_counters(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+ struct xsvnic *vp = PDE_DATA(file_inode(file));
+ char action[64];
+ int ret;
+
+ ret = sscanf(buffer, "%s", action);
+ if (ret != 1) {
+ return -EINVAL;
+ }
+ if ((strlen(action) == 1) && (atoi(action) == 0)) {
+ /* Clear counters */
+ memset(vp->counters, 0, sizeof(vp->counters));
+ vp->counters_cleared++;
+ }
+ return count;
+}
+
+static int xsvnic_proc_open_device_counters(struct inode *inode,
+ struct file *file)
+{
+ return single_open(file, xsvnic_proc_read_device_counters,
+ PDE_DATA(inode));
+}
+
+static void *xsvnic_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return (*pos < XSVNIC_MAX_COUNTERS) ? &counter_name[*pos] : 0;
+}
+
+static void *xsvnic_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ ++*pos;
+ return (*pos < XSVNIC_MAX_COUNTERS) ? &counter_name[*pos] : 0;
+}
+
+static int xsvnic_seq_show(struct seq_file *seq, void *v)
+{
+ struct xsvnic *vp = seq->private;
+
+ if (vp->ix == XSVNIC_MAX_COUNTERS)
+ vp->ix = 0;
+
+ seq_printf(seq, "%s %u\n", counter_name[vp->ix], vp->counters[vp->ix]);
+ vp->ix++;
+
+ return 0;
+}
+
+static void xsvnic_seq_stop(struct seq_file *seq, void *v)
+{
+ /* Nothing to be done here */
+}
+
+static int xsvnic_open(struct inode *inode, struct file *sfile)
+{
+ struct seq_file *seq;
+ int ret_val;
+
+ ret_val = seq_open(sfile, &xsvnic_seq_ops);
+ if (!ret_val) {
+ /* recover the pointer buried in proc_dir_entry data */
+ seq = sfile->private_data;
+ seq->private = PDE_DATA(inode);
+ }
+
+ return ret_val;
+};
+
+int xsvnic_add_proc_entry(struct xsvnic *vp)
+{
+ struct proc_dir_entry *file, *counter;
+
+ vp->vnic_dir = proc_mkdir(vp->vnic_name, proc_root_xsvnic_dev);
+
+ file = proc_create_data(vp->vnic_name, S_IFREG, vp->vnic_dir,
+ &xsvnic_device_proc_fops, vp);
+ if (!file) {
+ pr_info("Unable to create the xsvnic /proc entry\n");
+ return -ENOMEM;
+ }
+ if (xs_seq_file) {
+ /* Using seq_file for OVM */
+ counter = proc_create_data("counters", S_IFREG, vp->vnic_dir,
+ &xsvnic_file_ops, vp);
+ } else {
+ counter = proc_create_data("counters", S_IFREG, vp->vnic_dir,
+ &xsvnic_device_counters_proc_fops, vp);
+ }
+
+ if (!counter) {
+ pr_info("Unable to create the xsvnic /proc entry\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void xsvnic_remove_proc_entry(struct xsvnic *vp)
+{
+ remove_proc_entry(vp->vnic_name, vp->vnic_dir);
+ remove_proc_entry("counters", vp->vnic_dir);
+ remove_proc_entry(vp->vnic_name, proc_root_xsvnic_dev);
+}
+
+static ssize_t xsvnic_proc_write_debug(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp)
+{
+ int newval, ret;
+ char *buf = (char *) __get_free_page(GFP_USER);
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(buf, buffer, count - 1)) {
+ goto out;
+ }
+ buf[count] = '\0';
+
+ ret = kstrtoint(buf, 0, &newval);
+ if (ret != 0) {
+ return -EINVAL;
+ }
+ xsvnic_debug = newval;
+ return count;
+
+out:
+ free_page((unsigned long)buf);
+ return -EINVAL;
+}
+
+static int xsvnic_proc_read_debug(struct seq_file *m, void *data)
+{
+ int i;
+
+ seq_printf(m, "Total Wait time(secs): %ld\n", (xsvnic_wait_time / HZ));
+ seq_printf(m, "Debug bitmask : 0x%x\n\n", xsvnic_debug);
+ for (i = 0; i < XSVNIC_MAX_GLOB_COUNTERS; i++)
+ seq_printf(m, "%s%d\n", glob_counter_name[i],
+ xsvnic_counters[i]);
+ return 0;
+}
+
+static int xsvnic_proc_open_debug(struct inode *inode, struct file *file)
+{
+ return single_open(file, xsvnic_proc_read_debug, PDE_DATA(inode));
+}
+
+static int xsvnic_proc_read_iscsi_boot(struct seq_file *m, void *data)
+{
+ struct xsvnic *vp;
+
+ mutex_lock(&xsvnic_mutex);
+
+ list_for_each_entry(vp, &xsvnic_list, xsvnic_list) {
+ if (vp->iscsi_boot_info.initiator_iqn[0] == '\0')
+ continue;
+ seq_printf(m, "iscsiserver=%d.%d.%d.%d:%d\n",
+ (vp->iscsi_boot_info.target_ip_address >> 24) & 0xff,
+ (vp->iscsi_boot_info.target_ip_address >> 16) & 0xff,
+ (vp->iscsi_boot_info.target_ip_address >> 8) & 0xff,
+ (vp->iscsi_boot_info.target_ip_address >> 0) & 0xff,
+ vp->iscsi_boot_info.port);
+ seq_printf(m, "iscsiinitiator=%s\n",
+ vp->iscsi_boot_info.initiator_iqn);
+ seq_printf(m, "iscsitarget=%s:%d\n",
+ vp->iscsi_boot_info.target_iqn,
+ vp->iscsi_boot_info.lun);
+
+ if (vp->iscsi_boot_info.ip_addr == 0)
+ seq_printf(m, "iscsiboot=%s\n",
+ vp->iscsi_boot_info.vnic_name);
+ else {
+ seq_printf(m,
+ "iscsiboot=%s:%d.%d.%d.%d:%d.%d.%d.%d:%d.%d.%d.%d:%d.%d.%d.%d\n",
+ vp->iscsi_boot_info.vnic_name,
+ (vp->iscsi_boot_info.ip_addr >> 24) & 0xff,
+ (vp->iscsi_boot_info.ip_addr >> 16) & 0xff,
+ (vp->iscsi_boot_info.ip_addr >> 8) & 0xff,
+ (vp->iscsi_boot_info.ip_addr >> 0) & 0xff,
+ (vp->iscsi_boot_info.netmask >> 24) & 0xff,
+ (vp->iscsi_boot_info.netmask >> 16) & 0xff,
+ (vp->iscsi_boot_info.netmask >> 8) & 0xff,
+ (vp->iscsi_boot_info.netmask >> 0) & 0xff,
+ (vp->iscsi_boot_info.
+ gateway_ip_address >> 24) & 0xff,
+ (vp->iscsi_boot_info.
+ gateway_ip_address >> 16) & 0xff,
+ (vp->iscsi_boot_info.
+ gateway_ip_address >> 8) & 0xff,
+ (vp->iscsi_boot_info.
+ gateway_ip_address >> 0) & 0xff,
+ (vp->iscsi_boot_info.
+ dns_ip_address >> 24) & 0xff,
+ (vp->iscsi_boot_info.
+ dns_ip_address >> 16) & 0xff,
+ (vp->iscsi_boot_info.
+ dns_ip_address >> 8) & 0xff,
+ (vp->iscsi_boot_info.
+ dns_ip_address >> 0) & 0xff);
+ }
+
+ if (vp->iscsi_boot_info.mount_type == SAN_MOUNT_TYPE_LVM) {
+ if (vp->iscsi_boot_info.vol_group[0] != '\0')
+ seq_printf(m, "sanmount=lvm:%s:%s\n",
+ vp->iscsi_boot_info.vol_group,
+ vp->iscsi_boot_info.vol_group_name);
+ } else if (vp->iscsi_boot_info.mount_type ==
+ SAN_MOUNT_TYPE_DIRECT) {
+ /* direct mount device */
+ if (vp->iscsi_boot_info.mount_dev[0] != '\0')
+ seq_printf(m, "sanmount=%s\n",
+ vp->iscsi_boot_info.mount_dev);
+ }
+ seq_printf(m, "iscsitpg=%s\n",
+ vp->iscsi_boot_info.target_portal_group);
+ }
+
+ mutex_unlock(&xsvnic_mutex);
+
+ return 0;
+}
+
+static ssize_t xsvnic_proc_write_iscsi_boot(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+/* Not implemented (dummy write) */
+ return count;
+}
+
+static int xsvnic_proc_open_iscsi_boot(struct inode *inode, struct file *file)
+{
+ return single_open(file, xsvnic_proc_read_iscsi_boot, PDE_DATA(inode));
+}
+
+int xsvnic_create_procfs_root_entries(void)
+{
+ struct proc_dir_entry *debug_file;
+ int ret = 0;
+
+ proc_root_xsvnic = proc_mkdir("driver/xsvnic", NULL);
+ if (!proc_root_xsvnic) {
+ pr_info("Unable to create /proc/driver/xsvnic\n");
+ return -ENOMEM;
+ }
+ proc_root_xsvnic_dev = proc_mkdir("devices", proc_root_xsvnic);
+ if (!proc_root_xsvnic_dev) {
+ pr_info("Unable to create /proc/driver/xsvnic/devices\n");
+ ret = -ENOMEM;
+ goto create_proc_end_1;
+ }
+ debug_file = proc_create_data("debug", S_IFREG, proc_root_xsvnic,
+ &xsvnic_debug_proc_fops, NULL);
+ if (!debug_file) {
+ pr_info("Unable to create /proc/driver/xsvnic/debug\n");
+ ret = -ENOMEM;
+ goto create_proc_end_2;
+ }
+
+ iscsi_boot = proc_create_data("boot-info", S_IFREG, proc_root_xsvnic,
+ &xsvnic_iscsi_boot_proc_fops, NULL);
+ if (!iscsi_boot) {
+ pr_info("Unable to create /proc/driver/xsvnic/boot-info\n");
+ ret = -ENOMEM;
+ goto create_proc_end_3;
+ }
+
+ return 0;
+
+create_proc_end_3:
+ remove_proc_entry("debug", proc_root_xsvnic);
+create_proc_end_2:
+ remove_proc_entry("devices", proc_root_xsvnic_dev);
+create_proc_end_1:
+ remove_proc_entry("driver/xsvnic", NULL);
+ return ret;
+}
+
+void xsvnic_remove_procfs_root_entries(void)
+{
+ remove_proc_entry("debug", proc_root_xsvnic);
+ remove_proc_entry("devices", proc_root_xsvnic);
+ remove_proc_entry("boot-info", proc_root_xsvnic);
+ remove_proc_entry("driver/xsvnic", NULL);
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSVNIC_XSMP_MSGS_H__
+#define __XSVNIC_XSMP_MSGS_H__
+
+#define XSVNIC_MAX_NAME_SIZE 16
+#define CHASSIS_MAX_NAME_SIZE 32
+#define SESSION_MAX_NAME_SIZE 32
+#define XSVNIC_MAX_HOST_NAME 32
+#define MP_GROUP_NAME_MAX (XSVNIC_MAX_NAME_SIZE + \
+ XSVNIC_MAX_HOST_NAME)
+#define XSVNIC_VNIC_NAMELENTH 15
+
+enum xsvnic_xsmp_cmd_type {
+ XSMP_XSVNIC_INVALID,
+ XSMP_XSVNIC_INSTALL,
+ XSMP_XSVNIC_DELETE,
+ XSMP_XSVNIC_UPDATE,
+ XSMP_XSVNIC_ADMIN_UP,
+ XSMP_XSVNIC_ADMIN_DOWN,
+ XSMP_XSVNIC_OPER_UP,
+ XSMP_XSVNIC_OPER_DOWN,
+ XSMP_XSVNIC_OPER_READY,
+ XSMP_XSVNIC_VLANIP, /* VLAN and IP address */
+ XSMP_XSVNIC_STATS, /* XSVNIC driver statistics */
+ XSMP_XSVNIC_SYNC_BEGIN,
+ XSMP_XSVNIC_SYNC_END,
+ XSMP_XSVNIC_INFO_REQUEST, /* request vnic info */
+ XSMP_XSVNIC_OPER_FAILED,
+ XSMP_XSVNIC_OPER_REQ,
+ XSMP_XSVNIC_HA_INFO,
+ XSMP_XSVNIC_ISCSI_INFO,
+
+ XSMP_XSVNIC_TYPE_MAX,
+};
+
+/* XSVNIC specific messages */
+
+struct xsvnic_xsmp_msg {
+ union {
+ struct {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 bitmask;
+ u64 resource_id;
+ u64 tca_guid;
+ u16 tca_lid;
+ u16 mac_high;
+ u32 mac_low;
+ u16 vn_admin_rate;
+ u16 admin_state;
+ u16 encap;
+ u16 vn_mtu;
+ u32 install_flag;
+ u8 vnic_name[XSVNIC_MAX_NAME_SIZE];
+ u16 service_level; /* SL value for this vnic */
+ /* 1: enable, 0: disable host rate control */
+ u16 fc_active;
+ u16 cir; /* committed rate in mbps */
+ u16 pir; /* peak rate in mbps */
+ u32 cbs; /* committed burst size in bytes */
+ u32 pbs; /* peak burst size in bytes */
+ /* the index used by vmware for persistence */
+ u8 vm_index;
+ u8 _reserved;
+ u16 mp_flag;
+ u8 mp_group[MP_GROUP_NAME_MAX];
+ } __packed;
+ u8 bytes[512];
+ };
+} __packed;
+
+/* The reason code for NACKing an install */
+/* vnic name exceeding 15 chars */
+#define XSVNIC_NACK_INVALID 0
+/* duplicate name */
+#define XSVNIC_NACK_DUP_NAME 1
+/* duplicate VID */
+#define XSVNIC_NACK_DUP_VID 2
+/* Max number of XSVNICs reached */
+#define XSVNIC_NACK_LIMIT_REACHED 3
+/* Error during instantiation */
+#define XSVNIC_NACK_ALLOCATION_ERROR 4
+#define XSVNIC_NACK_CODE_MAX 5
+
+/* The common XSVNIC XSMP header for all messages */
+struct xsvnic_xsmp_header {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 bitmask;
+ u64 resource_id;
+};
+
+/* Maximum number of dwords in an IP address (v4 or v6) */
+#define MAX_IP_ADDR_DWORDS 4
+
+/* IP address type */
+enum xsvnic_ipaddr_type {
+ ADDR_TYPE_IPV4 = 1,
+ ADDR_TYPE_IPV6,
+};
+
+/* Bitmask values for add/delete VLAN notifications */
+#define XSVNIC_ADD_VLAN_NOTIFY (1 << 0)
+#define XSVNIC_DELETE_VLAN_NOTIFY (1 << 1)
+
+/* Denotes an instance of a VLANID and IP address pair */
+struct xsvnic_xsmp_vlanip_msg {
+ union {
+ struct {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 bitmask;
+ u64 resource_id;
+ u8 ip_type;
+ u8 _reserved1;
+ u16 _reserved2;
+ u32 vlanid;
+ u32 ipaddress[MAX_IP_ADDR_DWORDS];
+ u32 netmask[MAX_IP_ADDR_DWORDS];
+ /*
+ * This does not come from chassis but locally generated
+ */
+ char ifname[XSVNIC_MAX_NAME_SIZE];
+ u16 mp_flag;
+ } __packed;
+ u8 bytes[512];
+ };
+};
+
+struct xsvnic_ha_info_msg {
+ union {
+ struct {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 reserved;
+ u64 resource_id; /* vid */
+ u8 ha_state;
+ u8 name[XSVNIC_MAX_NAME_SIZE];
+ } __packed;
+ u8 bytes[512];
+ };
+} __packed;
+
+#define ISCSI_MOUNT_DEV_NAME_LEN 100
+#define MAX_DOMAIN_NAME_LEN 64
+
+#define SAN_MOUNT_TYPE_STATIC 1
+#define SAN_MOUNT_TYPE_LVM 2
+#define SAN_MOUNT_TYPE_DIRECT 3
+
+struct xsvnic_iscsi_info {
+ uint64_t vid;
+ uint8_t role;
+ uint16_t vlan_id;
+ uint8_t ip_type;
+ uint32_t ip_addr;
+ uint32_t netmask;
+ uint64_t mac;
+ char vnic_name[XSVNIC_MAX_NAME_SIZE];
+ uint32_t gateway_ip_address;
+ uint32_t dns_ip_address;
+ char domain_name[MAX_DOMAIN_NAME_LEN];
+ uint16_t protocol;
+ uint16_t port;
+ uint16_t lun;
+ uint32_t target_ip_address;
+ char target_iqn[ISCSI_MOUNT_DEV_NAME_LEN]; /* Target Name */
+ char target_portal_group[ISCSI_MOUNT_DEV_NAME_LEN];
+ char initiator_iqn[ISCSI_MOUNT_DEV_NAME_LEN];
+
+ uint16_t mount_type;
+ char mount_dev[ISCSI_MOUNT_DEV_NAME_LEN];
+ char mount_options[ISCSI_MOUNT_DEV_NAME_LEN];
+ char vol_group[ISCSI_MOUNT_DEV_NAME_LEN];
+ char vol_group_name[ISCSI_MOUNT_DEV_NAME_LEN];
+} __packed;
+
+struct xsvnic_iscsi_msg {
+ union {
+ struct {
+ uint8_t type;
+ uint8_t code;
+ uint16_t length;
+ struct xsvnic_iscsi_info iscsi_info;
+ } __packed;
+ uint8_t bytes[960];
+ };
+} __packed;
+
+/* Values for the bitmask of the install/delete/update message*/
+#define XSVNIC_UPDATE_MAC (1 << 0)
+#define XSVNIC_UPDATE_BANDWIDTH (1 << 1)
+#define XSVNIC_UPDATE_MTU (1 << 2)
+#define XSVNIC_UPDATE_TCA_INFO (1 << 3)
+#define XSVNIC_UPDATE_SL (1 << 4)
+#define XSVNIC_UPDATE_ENCAP (1 << 5)
+#define XSVNIC_UPDATE_ADMIN_STATE (1 << 6)
+#define XSVNIC_UPDATE_QOS (1 << 7)
+#define XSVNIC_UPDATE_ACL (1 << 8)
+#define XSVNIC_UPDATE_MP_FLAG (1 << 10)
+#define XSVNIC_XT_STATE_DOWN (1 << 30)
+#define XSVNIC_UPDATE_XT_CHANGE (1 << 31)
+
+/* mp_flag */
+#define MP_XSVNIC_PRIMARY (1 << 0)
+#define MP_XSVNIC_SECONDARY (1 << 1)
+#define MP_XSVNIC_AUTO_SWITCH (1 << 2)
+
+/* ha_state */
+#define XSVNIC_HA_STATE_UNKNOWN 0
+#define XSVNIC_HA_STATE_ACTIVE 1
+#define XSVNIC_HA_STATE_STANDBY 2
+
+/* Ack and Nack sent out in the 'code' field */
+#define XSMP_XSVNIC_ACK (1 << 6)
+#define XSMP_XSVNIC_NACK (1 << 7)
+
+/* Bits for the promiscuous flag field */
+#define XSVNIC_MCAST (1 << 0)
+
+/* Defines for the install flag */
+#define XSVNIC_INSTALL_TCP_OFFL (1 << 0)
+#define XSVNIC_INSTALL_UDP_OFFL (1 << 1)
+#define XSVNIC_INSTALL_TSO (1 << 3)
+#define XSVNIC_INSTALL_RX_BAT (1 << 4)
+#define XSVNIC_8K_IBMTU (1 << 5)
+#define XSVNIC_INSTALL_LINK2QP (1 << 8)
+
+#define XSIGO_IP_FRAGMENT_BIT (1 << 8)
+#define XSIGO_IPV4_BIT (1 << 6)
+#define XSIGO_TCP_CHKSUM_GOOD_BIT (1 << 3)
+#define XSIGO_UDP_CHKSUM_GOOD_BIT (1 << 1)
+
+#endif /* __XSVNIC_XSMP_MSGS_H__ */
--- /dev/null
+config INFINIBAND_XVE
+ tristate "Xsigo Virtual Ethernet"
+ depends on INFINIBAND_XSCORE && NETDEVICES && INET && (IPV6 || IPV6=n)
+ select INET_LRO
+ ---help---
+ Support for the Xsigo Virtual Ethernet (XVE) protocol,
+ Allowing private connectivity between servers using
+ Fabric switched Traffic.
+
+config INFINIBAND_XVE_CM
+ bool "Xsigo Virtual Ethernet Connected Mode support"
+ depends on INFINIBAND_XVE
+ default n
+ ---help---
+ This option enables support for XVE connected mode.
+
+ WARNING: Enabling connected mode will trigger some packet
+ drops for multicast and UD mode traffic from this interface,
+ unless you limit mtu for these destinations to 2030.
+
+config INFINIBAND_XVE_DEBUG
+ bool "Xsigo Virtual Ethernet debugging" if EMBEDDED
+ depends on INFINIBAND_XVE
+ default n
+ ---help---
+ This option causes debugging code to be compiled into the
+ XVE driver. The output can be turned on via the
+ debug_level and mcast_debug_level module parameters (which
+ can also be set after the driver is loaded through sysfs).
+
+ This option also creates a directory tree under xve/ in
+ debugfs, which contains files that expose debugging
+ information about IB multicast groups used by the XVE
+ driver.
+
+config INFINIBAND_XVE_DEBUG_DATA
+ bool "Xsigo Virtual Ethernet data path debugging"
+ depends on INFINIBAND_XVE_DEBUG
+ ---help---
+ This option compiles debugging code into the data path
+ of the XVE driver. The output can be turned on via the
+ data_debug_level module parameter; however, even with output
+ turned off, this debugging code will have some performance
+ impact.
--- /dev/null
+obj-$(CONFIG_INFINIBAND_XVE) := xve.o
+xve-y := xve_main.o xve_verbs.o xve_multicast.o xve_ib.o xve_tables.o \
+ xve_ethtool.o xve_cm.o xve_stats.o
+
+ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\"
+ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS
+ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE
+ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT
+ccflags-y += -Idrivers/infiniband/ulp/xsigo/xscore
+ccflags-y += -Idrivers/infiniband/include
--- /dev/null
+/*
+ * Copyright (c) 2008, 2009, 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HASH_H
+#define HASH_H 1
+
+/* This is the public domain lookup3 hash by Bob Jenkins from
+ * http://burtleburtle.net/bob/c/lookup3.c, modified for style. */
+
+#define HASH_ROT(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
+
+#define HASH_MIX(a, b, c) \
+ do { \
+ a -= c; a ^= HASH_ROT(c, 4); c += b; \
+ b -= a; b ^= HASH_ROT(a, 6); a += c; \
+ c -= b; c ^= HASH_ROT(b, 8); b += a; \
+ a -= c; a ^= HASH_ROT(c, 16); c += b; \
+ b -= a; b ^= HASH_ROT(a, 19); a += c; \
+ c -= b; c ^= HASH_ROT(b, 4); b += a; \
+ } while (0)
+
+#define HASH_FINAL(a, b, c) \
+ do { \
+ c ^= b; c -= HASH_ROT(b, 14); \
+ a ^= c; a -= HASH_ROT(c, 11); \
+ b ^= a; b -= HASH_ROT(a, 25); \
+ c ^= b; c -= HASH_ROT(b, 16); \
+ a ^= c; a -= HASH_ROT(c, 4); \
+ b ^= a; b -= HASH_ROT(a, 14); \
+ c ^= b; c -= HASH_ROT(b, 24); \
+ } while (0)
+
+static inline uint32_t hash_bytes(const void *p_, size_t n, uint32_t basis)
+{
+ const uint8_t *p = p_;
+ uint32_t a, b, c;
+ uint32_t tmp[3];
+
+ a = b = c = 0xdeadbeef + n + basis;
+
+ while (n >= sizeof(tmp)) {
+ memcpy(tmp, p, sizeof(tmp));
+ a += tmp[0];
+ b += tmp[1];
+ c += tmp[2];
+ HASH_MIX(a, b, c);
+ n -= sizeof(tmp);
+ p += sizeof(tmp);
+ }
+
+ if (n) {
+ tmp[0] = tmp[1] = tmp[2] = 0;
+ memcpy(tmp, p, n);
+ a += tmp[0];
+ b += tmp[1];
+ c += tmp[2];
+ HASH_FINAL(a, b, c);
+ }
+
+ return c;
+}
+
+#endif /* hash.h */
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _XVE_H
+#define _XVE_H
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/icmpv6.h>
+#include <linux/vmalloc.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/workqueue.h>
+#include <linux/kref.h>
+#include <linux/if_vlan.h>
+#include <linux/if_infiniband.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/if_arp.h>
+#include <linux/inet_lro.h>
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/err.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/proc_fs.h>
+#include <linux/version.h>
+#include <linux/mii.h>
+
+#include <net/neighbour.h>
+#include <net/dst.h>
+
+#include <linux/atomic.h>
+#include <asm/unaligned.h>
+
+#include <rdma/ib_cm.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_cache.h>
+
+#include "xscore.h"
+#include "hash.h"
+#include "xsmp_common.h"
+#include "xsmp_session.h"
+#include "xve_xsmp_msgs.h"
+
+#ifndef XSIGO_LOCAL_VERSION
+#define XVE_DRIVER_VERSION "0.31"
+#else
+#define XVE_DRIVER_VERSION "0.31" XSIGO_LOCAL_VERSION
+#endif
+
+#ifndef NETIF_F_LRO
+#define NETIF_F_LRO NETIF_F_SW_LRO
+#endif
+
+#ifndef bool
+#define bool int
+#define true 1
+#define false 0
+#endif
+
+/* macros for ipv6 support */
+
+/* 86 bytes */
+#define XVE_IPV6_MIN_PACK_LEN 86
+/* as per the protocol */
+#define IPV6_HDR_LEN 40
+/* 128 bits IP address length for ipv6 */
+#define IPV6_ADDR_LEN 16
+/* next header (icmp-ndp) in ipv6 header */
+#define NEXTHDR_ICMP 58
+/* Neighbor solicitation packet type */
+#define ICMP_NDP_TYPE 135
+/* payload length in ipv6 (icmp header + optional header 24 + 8 ) */
+#define PAYLOAD_LEN 32
+/* as per the protocol */
+#define ICMP_CODE 0
+/* length of ICMP-NDP header */
+#define ICMP_NDP_HDR_LEN 24
+/* source link layer address type */
+#define ICMP_OPTION_TYPE 1
+/* 8 bytes length of ICMP option header */
+#define ICMP_OPTION_LEN 1
+/* prefix for destination multicast address */
+#define PREFIX_MULTI_ADDR 0x33
+/* ethernet header length */
+#define ETH_HDR_LEN 14
+
+/* constants */
+enum xve_flush_level {
+ XVE_FLUSH_LIGHT,
+ XVE_FLUSH_NORMAL,
+ XVE_FLUSH_HEAVY
+};
+
+enum {
+ XVE_UD_HEAD_SIZE = IB_GRH_BYTES + VLAN_ETH_HLEN,
+ XVE_UD_RX_SG = 2, /* max buffer needed for 4K mtu */
+ XVE_CM_MTU = 0x10000 - 0x20, /* padding to align header to 16 */
+ XVE_CM_BUF_SIZE = XVE_CM_MTU + VLAN_ETH_HLEN,
+ XVE_CM_HEAD_SIZE = XVE_CM_BUF_SIZE % PAGE_SIZE,
+ XVE_CM_RX_SG = ALIGN(XVE_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
+ XVE_RX_RING_SIZE = 256,
+ XVE_TX_RING_SIZE = 128,
+ XVE_MAX_QUEUE_SIZE = 8192,
+ XVE_MIN_QUEUE_SIZE = 2,
+ XVE_CM_MAX_CONN_QP = 4096,
+ XVE_NUM_WC = 4,
+ XVE_MAX_PATH_REC_QUEUE = 3,
+ XVE_MAX_MCAST_QUEUE = 3,
+ XVE_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
+ XVE_MCAST_FLAG_SENDONLY = 1,
+ XVE_MCAST_FLAG_BUSY = 2, /* joining or already joined */
+ XVE_MCAST_FLAG_ATTACHED = 3,
+ XVE_MAX_LRO_DESCRIPTORS = 8,
+ XVE_LRO_MAX_AGGR = 64,
+ MAX_SEND_CQE = 32,
+ XVE_CM_COPYBREAK = 256,
+};
+
+enum {
+ XVE_FLAG_OPER_UP = 0,
+ XVE_FLAG_INITIALIZED = 1,
+ XVE_FLAG_ADMIN_UP = 2,
+ XVE_PKEY_ASSIGNED = 3,
+ XVE_PKEY_STOP = 4,
+ XVE_IB_DEV_OPEN = 5,
+ XVE_MCAST_RUN = 6,
+ XVE_STOP_REAPER = 7,
+ XVE_FLAG_ADMIN_CM = 9,
+ XVE_FLAG_UMCAST = 10,
+ XVE_FLAG_CSUM = 11,
+ XVE_MCAST_RUN_GC = 12,
+ XVE_FLAG_ADVERT_JOIN = 13,
+ XVE_FLAG_IB_EVENT = 14,
+ XVE_FLAG_DONT_DETACH_MCAST = 15,
+ XVE_MAX_BACKOFF_SECONDS = 16,
+};
+
+enum xve_advert_types {
+ XVE_ADVERT_JOIN = 1,
+ XVE_ADVERT_RESP = 2,
+ XVE_ADVERT_UPD = 3,
+};
+
+enum {
+ XVE_SYNC_END_DEL_COUNTER,
+ XVE_VNIC_INSTALL_COUNTER,
+ XVE_VNIC_DEL_COUNTER,
+ XVE_VNIC_DEL_NOVID_COUNTER,
+ XVE_VNIC_UPDATE_COUNTER,
+ XVE_VNIC_SYNC_BEGIN_COUNTER,
+ XVE_VNIC_SYNC_END_COUNTER,
+ XVE_VNIC_OPER_REQ_COUNTER,
+ XVE_VNIC_UNSUP_XSMP_COUNTER,
+ XVE_ISCSI_INFO_COUNTER,
+ XVE_DEVICE_REMOVAL_COUNTER,
+ XVE_VNIC_STATS_COUNTER,
+ XVE_NUM_PAGES_ALLOCED,
+ XVE_MAX_GLOB_COUNTERS
+};
+enum {
+ XVE_DATA_HBEAT_COUNTER,
+ XVE_HBEAT_ERR_COUNTER,
+ XVE_STATE_MACHINE,
+ XVE_STATE_MACHINE_UP,
+ XVE_STATE_MACHINE_DOWN,
+ XVE_NAPI_POLL_COUNTER,
+ XVE_SHORT_PKT_COUNTER,
+ XVE_TX_COUNTER,
+ XVE_TX_SKB_FREE_COUNTER,
+ XVE_TX_VLAN_COUNTER,
+ XVE_TX_ERROR_COUNTER,
+ XVE_TX_WRB_EXHAUST,
+ XVE_TX_DROP_OPER_DOWN_COUNT,
+ XVE_TX_SKB_ALLOC_ERROR_COUNTER,
+ XVE_TX_RING_FULL_COUNTER,
+ XVE_TX_WAKE_UP_COUNTER,
+ XVE_TX_QUEUE_STOP_COUNTER,
+ XVE_RX_SKB_COUNTER,
+ XVE_RX_SKB_ALLOC_COUNTER,
+ XVE_RX_SMALLSKB_ALLOC_COUNTER,
+ XVE_RX_SKB_FREE_COUNTER,
+ XVE_RX_SKB_OFFLOAD_COUNTER,
+ XVE_RX_SKB_OFFLOAD_FRAG_COUNTER,
+ XVE_RX_SKB_OFFLOAD_NONIPV4_COUNTER,
+ XVE_RX_ERROR_COUNTER,
+ XVE_RX_QUOTA_EXCEEDED_COUNTER,
+ XVE_RX_NOBUF_COUNTER,
+ XVE_NAPI_SCHED_COUNTER,
+ XVE_NAPI_NOTSCHED_COUNTER,
+ XVE_NAPI_RESCHEDULE_COUNTER,
+ XVE_OPEN_COUNTER,
+ XVE_STOP_COUNTER,
+ XVE_GETSTATS_COUNTER,
+ XVE_SET_MCAST_COUNTER,
+ XVE_VLAN_RX_ADD_COUNTER,
+ XVE_VLAN_RX_DEL_COUNTER,
+ XVE_IOCTL_COUNTER,
+ XVE_WDOG_TIMEOUT_COUNTER,
+ XVE_OPER_REQ_COUNTER,
+ XVE_ADMIN_UP_COUNTER,
+ XVE_ADMIN_DOWN_COUNTER,
+ XVE_OPER_UP_STATE_COUNTER,
+ XVE_QP_ERROR_COUNTER,
+ XVE_IB_RECOVERY_COUNTER,
+ XVE_IB_RECOVERED_COUNTER,
+ XVE_IBLINK_DOWN_COUNTER,
+ XVE_IBLINK_UP_COUNTER,
+ XVE_IB_PORT_NOT_ACTIVE,
+ XVE_SENT_OPER_UP_COUNTER,
+ XVE_SENT_OPER_DOWN_COUNTER,
+ XVE_SENT_OPER_STATE_FAILURE_COUNTER,
+ XVE_SENT_OPER_STATE_SUCCESS_COUNTER,
+ XVE_DROP_STANDBY_COUNTER,
+
+ XVE_MAC_LEARN_COUNTER,
+ XVE_MAC_AGED_COUNTER,
+ XVE_MAC_AGED_CHECK,
+ XVE_MAC_AGED_NOMATCHES,
+ XVE_MAC_STILL_INUSE,
+ XVE_MAC_MOVED_COUNTER,
+
+ XVE_MCAST_JOIN_TASK,
+ XVE_MCAST_LEAVE_TASK,
+ XVE_MCAST_CARRIER_TASK,
+
+ XVE_TX_UD_COUNTER,
+ XVE_TX_RC_COUNTER,
+ XVE_TX_MCAST_PKT,
+ XVE_TX_MCAST_ARP_QUERY,
+ XVE_TX_MCAST_NDP_QUERY,
+ XVE_TX_MCAST_ARP_VLAN_QUERY,
+ XVE_TX_MCAST_NDP_VLAN_QUERY,
+ XVE_TX_MCAST_FLOOD_UD,
+ XVE_TX_MCAST_FLOOD_RC,
+ XVE_TX_QUEUE_PKT,
+
+ XVE_PATH_NOT_FOUND,
+ XVE_PATH_NOT_SETUP,
+ XVE_AH_NOT_FOUND,
+
+ XVE_PATHREC_QUERY_COUNTER,
+ XVE_PATHREC_RESP_COUNTER,
+ XVE_PATHREC_RESP_ERR_COUNTER,
+
+ XVE_SM_CHANGE_COUNTER,
+ XVE_CLIENT_REREGISTER_COUNTER,
+ XVE_EVENT_PORT_ERR_COUNTER,
+ XVE_EVENT_PORT_ACTIVE_COUNTER,
+ XVE_EVENT_LID_CHANGE_COUNTER,
+ XVE_EVENT_PKEY_CHANGE_COUNTER,
+ XVE_INVALID_EVENT_COUNTER,
+
+ XVE_MAX_COUNTERS
+};
+
+enum {
+ /* Work queue Counters */
+ XVE_WQ_START_PKEYPOLL,
+ XVE_WQ_FINISH_PKEYPOLL,
+ XVE_WQ_START_AHREAP,
+ XVE_WQ_FINISH_AHREAP,
+ XVE_WQ_START_FWT_AGING,
+ XVE_WQ_FINISH_FWT_AGING,
+ XVE_WQ_START_MCASTJOIN,
+ XVE_WQ_FINISH_MCASTJOIN,
+ XVE_WQ_START_MCASTLEAVE,
+ XVE_WQ_FINISH_MCASTLEAVE,
+ XVE_WQ_START_MCASTON,
+ XVE_WQ_FINISH_MCASTON,
+ XVE_WQ_START_MCASTRESTART,
+ XVE_WQ_FINISH_MCASTRESTART,
+ XVE_WQ_START_FLUSHLIGHT,
+ XVE_WQ_FINISH_FLUSHLIGHT,
+ XVE_WQ_START_FLUSHNORMAL,
+ XVE_WQ_FINISH_FLUSHNORMAL,
+ XVE_WQ_START_FLUSHHEAVY,
+ XVE_WQ_FINISH_FLUSHHEAVY,
+ XVE_WQ_START_CMSTALE,
+ XVE_WQ_FINISH_CMSTALE,
+ XVE_WQ_START_CMTXSTART,
+ XVE_WQ_FINISH_CMTXSTART,
+ XVE_WQ_START_CMTXREAP,
+ XVE_WQ_FINISH_CMTXREAP,
+ XVE_WQ_START_CMRXREAP,
+ XVE_WQ_FINISH_CMRXREAP,
+ XVE_WQ_DONT_SCHEDULE,
+ XVE_WQ_INVALID,
+ XVE_WQ_FAILED,
+
+ XVE_MISC_MAX_COUNTERS
+};
+
+/* SPEEED CALCULATION */
+enum {
+ SPEED_SDR = 2500,
+ SPEED_DDR = 5000,
+ SPEED_QDR = 10000,
+ SPEED_FDR10 = 10313,
+ SPEED_FDR = 14063,
+ SPEED_EDR = 25781
+};
+
+/*
+ * Quoting 10.3.1 Queue Pair and EE Context States:
+ *
+ * Note, for QPs that are associated with an SRQ, the Consumer should take the
+ * QP through the Error State before invoking a Destroy QP or a Modify QP to the
+ * Reset State. The Consumer may invoke the Destroy QP without first performing
+ * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
+ * Last WQE Reached Event. However, if the Consumer does not wait for the
+ * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
+ * leakage may occur. Therefore, it is good programming practice to tear down a
+ * QP that is associated with an SRQ by using the following process:
+ *
+ * - Put the QP in the Error State
+ * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
+ * - either:
+ * drain the CQ by invoking the Poll CQ verb and either wait for CQ
+ * to be empty or the number of Poll CQ operations has exceeded
+ * CQ capacity size;
+ * - or
+ * post another WR that completes on the same CQ and wait for this
+ * WR to return as a WC;
+ * - and then invoke a Destroy QP or Reset QP.
+ *
+ * We use the second option and wait for a completion on the
+ * same CQ before destroying QPs attached to our SRQ.
+ */
+
+enum xve_cm_state {
+ XVE_CM_RX_LIVE = 1,
+ XVE_CM_RX_ERROR, /* Ignored by stale task */
+ XVE_CM_RX_FLUSH /* Last WQE Reached event observed */
+};
+
+enum {
+ DEBUG_DRV_INFO = 0x00000001,
+ DEBUG_DRV_FUNCTION = 0x00000002,
+ DEBUG_XSMP_INFO = 0x00000004,
+ DEBUG_XSMP_FUNCTION = 0x00000008,
+ DEBUG_IOCTRL_INFO = 0x00000010,
+ DEBUG_IOCTRL_FUNCTION = 0x00000020,
+ DEBUG_TEST_INFO = 0x00000040,
+ DEBUG_DATA_INFO = 0x00000080,
+ DEBUG_MCAST_INFO = 0x00000100,
+ DEBUG_TABLE_INFO = 0x00000200,
+ DEBUG_FLUSH_INFO = 0x00000400,
+ DEBUG_DUMP_PKTS = 0x00000800,
+ DEBUG_SEND_INFO = 0x00001000,
+ DEBUG_CONTINUE_UNLOAD = 0x00002000,
+ DEBUG_MISC_INFO = 0x00004000,
+ DEBUG_IBDEV_INFO = 0x00008000,
+ DEBUG_CM_INFO = 0x00010000
+};
+
+#define XVE_OP_RECV (1ul << 31)
+#define XVE_FWT_HASH_LISTS 256
+#define XVE_MACT_HASH_LISTS 32
+#define XVE_ADVERT_PROTO 0x8915
+
+#define XVE_SYNC_DIRTY 1
+#define XVE_OS_ADMIN_UP 2
+#define XVE_CHASSIS_ADMIN_UP 3
+#define XVE_DELETING 4
+#define XVE_SEND_ADMIN_STATE 5
+#define XVE_PORT_LINK_UP 6
+#define XVE_OPER_REP_SENT 7
+#define XVE_START_RESP_RCVD 8
+#define XVE_OPER_UP 9
+#define XVE_STOP_RX_SENT 10
+#define XVE_XT_DOWN 11
+#define XVE_XT_STATE_CHANGE 12
+#define XVE_SHUTDOWN 13
+#define XVE_MCAST_LIST_SENT 14
+#define XVE_RING_SIZE_CHANGE 15
+#define XVE_RX_NOBUF 16
+#define XVE_INTR_ENABLED 17
+#define XVE_TRIGGER_NAPI_SCHED 18
+#define XVE_IBLINK_DOWN 19
+#define XVE_MCAST_LIST_PENDING 20
+#define XVE_MCAST_LIST_TIMEOUT 21
+#define XVE_CHASSIS_ADMIN_SHADOW_UP 22
+#define XVE_OVER_QUOTA 23
+#define XVE_TSO_CHANGE 24
+#define XVE_RXBATCH_CHANGE 25
+#define MODULE_NAME "XVE"
+#define ALIGN_TO_FF(a) (a & 0xff)
+#define XVE_FWT_ENTRY_VALID 1
+#define XVE_FWT_ENTRY_REFRESH 2
+#define XVE_UD_MTU(ib_mtu) (ib_mtu - VLAN_ETH_HLEN)
+#define XVE_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES + VLAN_ETH_HLEN)
+#define XVE_MIN_PACKET_LEN 60
+
+/*Extern declarations */
+extern int xve_debug_level;
+extern int xve_cm_single_qp;
+extern u32 xve_hash_salt;
+extern int xve_sendq_size;
+extern int xve_recvq_size;
+extern struct ib_sa_client xve_sa_client;
+extern u32 xve_counters[];
+extern struct workqueue_struct *xve_taskqueue;
+extern struct workqueue_struct *xve_workqueue;
+extern int xve_mc_sendonly_timeout;
+
+extern void xve_remove_procfs_root_entries(void);
+extern int xve_create_procfs_root_entries(void);
+
+
+extern struct mutex xve_mutex;
+extern struct list_head xve_dev_list;
+
+/* structs */
+/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
+struct xve_mcast {
+ struct ib_sa_mcmember_rec mcmember;
+ struct ib_sa_multicast *mc;
+ struct xve_ah *ah;
+
+ struct rb_node rb_node;
+ struct list_head list;
+
+ unsigned long created;
+ unsigned long used;
+ unsigned long backoff;
+ unsigned long flags;
+ unsigned char logcount;
+ struct sk_buff_head pkt_queue;
+ struct net_device *netdev;
+};
+
+struct xve_rx_buf {
+ struct sk_buff *skb;
+ u64 mapping[XVE_UD_RX_SG];
+};
+
+struct xve_tx_buf {
+ struct sk_buff *skb;
+ u64 mapping[MAX_SKB_FRAGS + 1];
+};
+
+struct xve_cm_buf {
+ struct sk_buff *skb;
+ u64 mapping[XVE_CM_RX_SG];
+};
+
+struct ib_cm_id;
+
+struct xve_cm_data {
+ __be32 qpn; /* High byte MUST be ignored on receive */
+ __be32 mtu;
+};
+
+/* CM connection Estd Direction */
+enum {
+ XVE_CM_ESTD_RX = 1,
+ XVE_CM_ESTD_TX
+};
+
+/* CM Statistics */
+struct xve_cm_stats {
+ unsigned long tx_jiffies;
+ unsigned long rx_jiffies;
+ unsigned long total_rx_bytes;
+ unsigned long total_tx_bytes;
+ u32 tx_rate;
+ u32 rx_rate;
+ u32 tx_bytes;
+ u32 rx_bytes;
+
+};
+
+/* Single QP structure */
+struct xve_cm_ctx {
+ char version[64];
+ struct xve_path *path;
+ struct ib_cm_id *id;
+ struct ib_qp *qp;
+ struct list_head list;
+ struct net_device *netdev;
+ struct xve_cm_buf *tx_ring;
+ struct xve_cm_buf *rx_ring;
+ struct xve_cm_stats stats;
+ union ib_gid dgid;
+ enum xve_cm_state state;
+ unsigned long flags;
+ unsigned long jiffies;
+ u32 mtu;
+ int recv_count;
+ unsigned tx_head;
+ unsigned tx_tail;
+ u8 direction;
+};
+
+struct xve_cm_dev_priv {
+ struct ib_srq *srq;
+ struct xve_cm_buf *srq_ring;
+ struct ib_cm_id *id;
+ struct list_head passive_ids; /* state: LIVE */
+ struct list_head rx_error_list; /* state: ERROR */
+ struct list_head rx_flush_list; /* state: FLUSH, drain not started */
+ struct list_head rx_drain_list; /* state: FLUSH, drain started */
+ struct list_head rx_reap_list; /* state: FLUSH, drain done */
+ struct list_head start_list;
+ struct list_head reap_list;
+ struct ib_wc ibwc[XVE_NUM_WC];
+ struct ib_sge rx_sge[XVE_CM_RX_SG];
+ struct ib_recv_wr rx_wr;
+ int nonsrq_conn_qp;
+ int max_cm_mtu;
+ int num_frags;
+};
+
+struct xve_ethtool_st {
+ u16 coalesce_usecs;
+ u16 max_coalesced_frames;
+};
+
+struct xve_lro {
+ struct net_lro_mgr lro_mgr;
+ struct net_lro_desc lro_desc[XVE_MAX_LRO_DESCRIPTORS];
+};
+
+struct xve_fwt_entry {
+ struct list_head list;
+ struct hlist_node hlist;
+ struct xve_path *path;
+ union ib_gid dgid;
+ char smac_addr[ETH_ALEN];
+ unsigned long state;
+ atomic_t ref_cnt;
+ unsigned long last_refresh;
+ int hash_value;
+ u32 dqpn;
+ u16 vlan;
+};
+
+struct xve_fwt_s {
+ struct hlist_head fwt[XVE_FWT_HASH_LISTS];
+ spinlock_t lock;
+ unsigned num;
+};
+
+/*
+ * Device private locking: network stack tx_lock protects members used
+ * in TX fast path, lock protects everything else. lock nests inside
+ * of tx_lock (ie tx_lock must be acquired first if needed).
+ */
+struct xve_dev_priv {
+ struct list_head list;
+ spinlock_t lock;
+ struct mutex mutex;
+ atomic_t ref_cnt;
+
+ struct ib_device *ca;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ struct ib_qp *qp;
+ union ib_gid local_gid;
+ union ib_gid bcast_mgid;
+ u16 local_lid;
+ u32 qkey;
+
+ /* Netdev related attributes */
+ struct net_device *netdev;
+ struct net_device_stats stats;
+ struct napi_struct napi;
+ struct xve_ethtool_st ethtool;
+ u8 lro_mode;
+ struct xve_lro lro;
+ unsigned long flags;
+ unsigned long state;
+
+ struct rb_root path_tree;
+ struct list_head path_list;
+ struct xve_mcast *broadcast;
+ struct list_head multicast_list;
+ struct rb_root multicast_tree;
+
+ struct delayed_work sm_work;
+ struct delayed_work stale_task;
+ struct delayed_work mcast_leave_task;
+ struct delayed_work mcast_join_task;
+ int sm_delay;
+ unsigned int send_hbeat_flag;
+ unsigned long jiffies;
+ struct xve_fwt_s xve_fwt;
+ int aging_delay;
+
+ struct xve_cm_dev_priv cm;
+ unsigned int cm_supported;
+
+ struct ib_port_attr port_attr;
+ u8 port;
+ u16 pkey;
+ u16 pkey_index;
+ int port_speed;
+ int hca_caps;
+ unsigned int admin_mtu;
+ unsigned int mcast_mtu;
+ unsigned int max_ib_mtu;
+ char mode[64];
+
+ /* TX and RX Ring attributes */
+ struct xve_rx_buf *rx_ring;
+ struct xve_tx_buf *tx_ring;
+ unsigned tx_head;
+ unsigned tx_tail;
+ unsigned tx_outstanding;
+ struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
+ struct ib_send_wr tx_wr;
+ struct ib_wc send_wc[MAX_SEND_CQE];
+ struct ib_recv_wr rx_wr;
+ struct ib_sge rx_sge[XVE_UD_RX_SG];
+ struct ib_wc ibwc[XVE_NUM_WC];
+ struct ib_cq *recv_cq;
+ struct ib_cq *send_cq;
+ struct list_head dead_ahs;
+ struct ib_event_handler event_handler;
+
+ /* XSMP related attributes */
+ xsmp_cookie_t xsmp_hndl;
+ struct xsmp_session_info xsmp_info;
+ u64 resource_id;
+ u64 mac;
+ u32 net_id;
+ u16 mp_flag;
+ char vnet_mode;
+ char xve_name[XVE_MAX_NAME_SIZE];
+
+ /* Proc related attributes */
+ struct proc_dir_entry *nic_dir;
+ unsigned long work_queue_failed;
+ char proc_name[XVE_MAX_PROC_NAME_SIZE];
+ u32 counters[XVE_MAX_COUNTERS];
+ u32 misc_counters[XVE_MISC_MAX_COUNTERS];
+ int sindex;
+ int jindex;
+ u16 counters_cleared;
+ u8 next_page;
+ int ix;
+};
+
+struct xve_ah {
+ struct net_device *dev;
+ struct ib_ah *ah;
+ struct list_head list;
+ struct kref ref;
+ unsigned last_send;
+};
+
+struct ib_packed_grh {
+ u8 ip_version;
+ u8 traffic_class;
+ u16 flow_label;
+ u16 payload_length;
+ u8 next_header;
+ u8 hop_limit;
+ union ib_gid source_gid;
+ union ib_gid destination_gid;
+};
+
+struct xve_path {
+ struct net_device *dev;
+ struct xve_cm_ctx *cm_ctx_common;
+ struct xve_cm_ctx *cm_ctx_tx;
+ struct xve_cm_ctx *cm_ctx_rx;
+ struct ib_sa_path_rec pathrec;
+ struct xve_ah *ah;
+ int query_id;
+ struct ib_sa_query *query;
+ struct completion done;
+ struct list_head fwt_list;
+ struct rb_node rb_node;
+ struct list_head list;
+ int valid;
+ struct sk_buff_head queue;
+};
+
+struct xve_work {
+ struct work_struct work;
+ struct delayed_work dwork;
+ xsmp_cookie_t xsmp_hndl;
+ struct xve_dev_priv *priv;
+ int len;
+ int status;
+ u8 *msg;
+};
+
+struct icmp6_ndp {
+ unsigned char icmp6_type;
+ unsigned char icmp6_code;
+ unsigned short int icmp6_cksum;
+ unsigned int icmp6_reserved;
+ unsigned char icmp6_daddr[16];
+ unsigned char icmp6_option_type;
+ unsigned char icmp6_option_len;
+ unsigned char icmp6_option_saddr[6];
+};
+
+#define INC_TX_DROP_STATS(priv, dev) \
+ do { \
+ ++dev->stats.tx_dropped; \
+ ++priv->stats.tx_dropped; \
+ } while (0)
+#define INC_TX_ERROR_STATS(priv, dev) \
+ do { \
+ ++priv->stats.tx_errors; \
+ ++dev->stats.tx_errors; \
+ } while (0)
+#define INC_TX_PKT_STATS(priv, dev) \
+ do { \
+ ++priv->stats.tx_packets; \
+ ++dev->stats.tx_packets; \
+ } while (0)
+#define INC_TX_BYTE_STATS(priv, dev, len) \
+ do { \
+ priv->stats.tx_bytes += len; \
+ dev->stats.tx_bytes += len; \
+ } while (0)
+#define INC_RX_DROP_STATS(priv, dev) \
+ do { \
+ ++dev->stats.rx_dropped; \
+ ++priv->stats.rx_dropped; \
+ } while (0)
+#define INC_RX_ERROR_STATS(priv, dev) \
+ do { \
+ ++priv->stats.rx_errors; \
+ ++dev->stats.rx_errors; \
+ } while (0)
+#define INC_RX_PKT_STATS(priv, dev) \
+ do { \
+ ++priv->stats.rx_packets; \
+ ++dev->stats.rx_packets; \
+ } while (0)
+
+#define INC_RX_BYTE_STATS(priv, dev, len) \
+ do { \
+ priv->stats.rx_bytes += len; \
+ dev->stats.rx_bytes += len; \
+ } while (0)
+
+#define SET_FLUSH_BIT(priv, bit) \
+ do { \
+ unsigned long flags; \
+ spin_lock_irqsave(&priv->lock, flags); \
+ set_bit(bit, &priv->state); \
+ spin_unlock_irqrestore(&priv->lock, flags); \
+ } while (0)
+
+#define PRINT(level, x, fmt, arg...) \
+ printk(level "%s: " fmt, MODULE_NAME, ##arg)
+#define XSMP_ERROR(fmt, arg...) \
+ PRINT(KERN_ERR, "XSMP", fmt, ##arg)
+#define xve_printk(level, priv, format, arg...) \
+ printk(level "%s: " format, \
+ ((struct xve_dev_priv *) priv)->netdev->name, \
+ ## arg)
+#define xve_warn(priv, format, arg...) \
+ xve_printk(KERN_WARNING, priv, format , ## arg)
+
+#define XSMP_INFO(fmt, arg...) \
+ do { \
+ if (xve_debug_level & DEBUG_XSMP_INFO) \
+ PRINT(KERN_DEBUG, "XSMP", fmt , ## arg);\
+ } while (0)
+
+#define xve_test(fmt, arg...) \
+ do { \
+ if (xve_debug_level & DEBUG_TEST_INFO) \
+ PRINT(KERN_DEBUG, "DEBUG", fmt , ## arg); \
+ } while (0)
+
+#define xve_dbg_data(priv, format, arg...) \
+ do { \
+ if (xve_debug_level & DEBUG_DATA_INFO) \
+ xve_printk(KERN_DEBUG, priv, format, \
+ ## arg); \
+ } while (0)
+#define xve_dbg_mcast(priv, format, arg...) \
+ do { \
+ if (xve_debug_level & DEBUG_MCAST_INFO) \
+ xve_printk(KERN_ERR, priv, format , ## arg); \
+ } while (0)
+#define xve_debug(level, priv, format, arg...) \
+ do { \
+ if (xve_debug_level & level) { \
+ if (priv) \
+ printk("%s: " format, \
+ ((struct xve_dev_priv *) priv)->netdev->name, \
+ ## arg); \
+ else \
+ printk("XVE: " format, ## arg); \
+ } \
+ } while (0)
+
+static inline void update_cm_rx_rate(struct xve_cm_ctx *rx_qp, ulong bytes)
+{
+ rx_qp->stats.total_rx_bytes += bytes;
+ rx_qp->stats.rx_bytes += bytes;
+
+ /* update the rate once in two seconds */
+ if ((jiffies - rx_qp->stats.rx_jiffies) > 2 * (HZ)) {
+ u32 r;
+
+ r = rx_qp->stats.rx_bytes /
+ ((jiffies - rx_qp->stats.rx_jiffies) / (HZ));
+ r = (r / 1000000); /* MB/Sec */
+ /* Mega Bits/Sec */
+ rx_qp->stats.rx_rate = (r * 8);
+ rx_qp->stats.rx_jiffies = jiffies;
+ rx_qp->stats.rx_bytes = 0;
+ }
+}
+
+static inline void update_cm_tx_rate(struct xve_cm_ctx *tx_qp, ulong bytes)
+{
+ tx_qp->stats.total_tx_bytes += bytes;
+ tx_qp->stats.tx_bytes += bytes;
+
+ /* update the rate once in two seconds */
+ if ((jiffies - tx_qp->stats.tx_jiffies) > 2 * (HZ)) {
+ u32 r;
+
+ r = tx_qp->stats.tx_bytes /
+ ((jiffies - tx_qp->stats.tx_jiffies) / (HZ));
+ r = (r / 1000000); /* MB/Sec */
+ /* Mega Bits/Sec */
+ tx_qp->stats.tx_rate = (r * 8);
+ tx_qp->stats.tx_jiffies = jiffies;
+ tx_qp->stats.tx_bytes = 0;
+ }
+}
+
+static inline int xve_ud_need_sg(unsigned int ib_mtu)
+{
+ return XVE_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
+}
+
+static inline struct page *xve_alloc_page(gfp_t alloc_flags)
+{
+ xve_counters[XVE_NUM_PAGES_ALLOCED]++;
+ return alloc_page(alloc_flags);
+}
+
+static inline void xve_send_skb(struct xve_dev_priv *priv, struct sk_buff *skb)
+{
+ struct net_device *netdev = priv->netdev;
+
+ if (netdev->features & NETIF_F_LRO)
+ lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
+ else
+ netif_receive_skb(skb);
+
+ netdev->last_rx = jiffies;
+ INC_RX_BYTE_STATS(priv, netdev, skb->len);
+ INC_RX_PKT_STATS(priv, netdev);
+}
+
+static inline struct sk_buff *xve_dev_alloc_skb(struct xve_dev_priv *priv,
+ unsigned int size)
+{
+
+ struct sk_buff *skb = dev_alloc_skb(size);
+
+ if (skb)
+ priv->counters[XVE_RX_SKB_ALLOC_COUNTER]++;
+ return skb;
+}
+
+static inline void xve_dev_kfree_skb_any(struct xve_dev_priv *priv,
+ struct sk_buff *skb, u8 type)
+{
+
+ if (type)
+ priv->counters[XVE_TX_SKB_FREE_COUNTER]++;
+ else
+ priv->counters[XVE_RX_SKB_FREE_COUNTER]++;
+
+ if (skb)
+ dev_kfree_skb_any(skb);
+
+}
+
+static inline int xve_cm_admin_enabled(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ return priv->cm_supported && test_bit(XVE_FLAG_ADMIN_CM, &priv->flags);
+}
+
+static inline int xve_cm_enabled(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ return priv->cm_supported && test_bit(XVE_FLAG_ADMIN_CM, &priv->flags);
+}
+
+static inline int xve_cm_up(struct xve_path *path)
+{
+ if (xve_cm_single_qp)
+ return test_bit(XVE_FLAG_OPER_UP, &path->cm_ctx_common->flags);
+ else
+ return test_bit(XVE_FLAG_OPER_UP, &path->cm_ctx_tx->flags);
+}
+
+static inline struct xve_cm_ctx *xve_get_cmctx(struct xve_path *path)
+{
+ return path->cm_ctx_common;
+}
+
+static inline struct xve_cm_ctx *xve_cmtx_get(struct xve_path *path)
+{
+ if (xve_cm_single_qp)
+ return path->cm_ctx_common;
+ else
+ return path->cm_ctx_tx;
+}
+
+static inline struct xve_cm_ctx *xve_cmrx_get(struct xve_path *path)
+{
+ return path->cm_ctx_rx;
+}
+
+static inline void xve_cm_set(struct xve_path *path, struct xve_cm_ctx *tx)
+{
+ if (xve_cm_single_qp)
+ path->cm_ctx_common = tx;
+ else
+ path->cm_ctx_tx = tx;
+}
+
+static inline int xve_cm_has_srq(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ return !!priv->cm.srq;
+}
+
+static inline unsigned int xve_cm_max_mtu(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ return priv->cm.max_cm_mtu;
+}
+
+static inline void xve_put_ctx(struct xve_dev_priv *priv)
+{
+ atomic_dec(&priv->ref_cnt);
+}
+
+/* Adjust length of skb with fragments to match received data */
+static inline void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
+ unsigned int length, struct sk_buff *toskb)
+{
+ int i, num_frags;
+ unsigned int size;
+
+ /* put header into skb */
+ size = min(length, hdr_space);
+ skb->tail += size;
+ skb->len += size;
+ length -= size;
+
+ num_frags = skb_shinfo(skb)->nr_frags;
+ for (i = 0; i < num_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ if (length == 0) {
+ /* don't need this page */
+ skb_fill_page_desc(toskb, i, skb_frag_page(frag),
+ 0, PAGE_SIZE);
+ --skb_shinfo(skb)->nr_frags;
+ } else {
+ size = min_t(unsigned, length, (unsigned)PAGE_SIZE);
+
+ frag->size = size;
+ skb->data_len += size;
+ skb->truesize += size;
+ skb->len += size;
+ length -= size;
+ }
+ }
+}
+
+/* functions */
+int xve_poll(struct napi_struct *napi, int budget);
+void xve_ib_completion(struct ib_cq *cq, void *dev_ptr);
+void xve_data_recv_handler(struct xve_dev_priv *priv);
+void xve_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
+struct xve_ah *xve_create_ah(struct net_device *dev,
+ struct ib_pd *pd, struct ib_ah_attr *attr);
+void xve_free_ah(struct kref *kref);
+static inline void xve_put_ah(struct xve_ah *ah)
+{
+ kref_put(&ah->ref, xve_free_ah);
+}
+
+int xve_open(struct net_device *dev);
+int xve_add_pkey_attr(struct net_device *dev);
+
+void xve_send(struct net_device *dev, struct sk_buff *skb,
+ struct xve_ah *address, u32 qpn);
+int poll_tx(struct xve_dev_priv *priv);
+int xve_xsmp_send_oper_state(struct xve_dev_priv *priv, u64 vid, int state);
+void handle_carrier_state(struct xve_dev_priv *priv, char state);
+void queue_sm_work(struct xve_dev_priv *priv, int msecs);
+void queue_age_work(struct xve_dev_priv *priv, int msecs);
+
+void xve_mark_paths_invalid(struct net_device *dev);
+void xve_flush_paths(struct net_device *dev);
+void xve_flush_single_path(struct net_device *dev, struct xve_path *path);
+void xve_flush_single_path_by_gid(struct net_device *dev, union ib_gid *gid);
+struct xve_dev_priv *xve_intf_alloc(const char *format);
+
+int xve_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
+void xve_ib_dev_cleanup(struct net_device *dev);
+void xve_ib_dev_flush_light(struct work_struct *work);
+void xve_ib_dev_flush_normal(struct work_struct *work);
+void xve_ib_dev_flush_heavy(struct work_struct *work);
+void xve_pkey_event(struct work_struct *work);
+void xve_reap_ah(struct work_struct *work);
+void xve_cm_stale_task(struct work_struct *work);
+void xve_mcast_join_task(struct work_struct *work);
+void xve_mcast_leave_task(struct work_struct *work);
+void xve_mcast_restart_task(struct work_struct *work);
+void xve_cm_tx_start(struct work_struct *work);
+void xve_cm_tx_reap(struct work_struct *work);
+void xve_cm_rx_reap(struct work_struct *work);
+void xve_state_machine_work(struct work_struct *work);
+void xve_pkey_poll(struct work_struct *work);
+void xve_start_aging_work(struct work_struct *work);
+void xve_mcast_carrier_on_task(struct work_struct *work);
+
+int xve_ib_dev_open(struct net_device *dev);
+int xve_ib_dev_up(struct net_device *dev);
+int xve_ib_dev_down(struct net_device *dev, int flush);
+int xve_ib_dev_stop(struct net_device *dev, int flush);
+
+int xve_dev_init(struct net_device *dev, struct ib_device *ca, int port);
+void xve_dev_cleanup(struct net_device *dev);
+void xve_fwt_entry_destroy(struct xve_dev_priv *priv,
+ struct xve_fwt_entry *fwt_entry);
+void xve_remove_fwt_entry(struct xve_dev_priv *priv,
+ struct xve_fwt_entry *fwt_entry);
+void xve_fwt_entry_free(struct xve_dev_priv *priv,
+ struct xve_fwt_entry *fwt_entry);
+
+void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
+void xve_advert_mcast_join(struct xve_dev_priv *priv);
+int xve_mcast_start_thread(struct net_device *dev);
+int xve_mcast_stop_thread(struct net_device *dev, int flush);
+
+void xve_mcast_dev_down(struct net_device *dev);
+void xve_mcast_dev_flush(struct net_device *dev);
+int xve_mcast_attach(struct net_device *dev, u16 mlid,
+ union ib_gid *mgid, int set_qkey);
+
+int xve_init_qp(struct net_device *dev);
+int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca);
+void xve_transport_dev_cleanup(struct net_device *dev);
+
+void xve_event(struct ib_event_handler *handler, struct ib_event *record);
+
+int xve_pkey_dev_delay_open(struct net_device *dev);
+void xve_drain_cq(struct net_device *dev);
+
+void xve_set_ethtool_ops(struct net_device *dev);
+int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca);
+int xve_modify_mtu(struct net_device *netdev, int new_mtu);
+
+struct sk_buff *xve_generate_query(struct xve_dev_priv *priv,
+ struct sk_buff *skb);
+struct sk_buff *xve_create_arp(struct xve_dev_priv *priv,
+ struct sk_buff *org_skb);
+struct sk_buff *xve_create_ndp(struct xve_dev_priv *priv,
+ struct sk_buff *org_skb);
+int xve_send_hbeat(struct xve_dev_priv *xvep);
+void xve_xsmp_handle_oper_req(xsmp_cookie_t xsmp_hndl, u64 resource_id);
+
+/*CM */
+void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
+ struct xve_cm_ctx *tx);
+int xve_cm_dev_open(struct net_device *dev);
+void xve_cm_dev_stop(struct net_device *dev);
+int xve_cm_dev_init(struct net_device *dev);
+void xve_cm_dev_cleanup(struct net_device *dev);
+struct xve_cm_ctx *xve_cm_create_tx(struct net_device *dev,
+ struct xve_path *path);
+void xve_cm_destroy_tx_deferred(struct xve_cm_ctx *tx);
+void xve_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
+ unsigned int mtu);
+void xve_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
+void xve_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
+
+int xve_tables_init(void);
+void xve_fwt_init(struct xve_fwt_s *xve_fwt);
+void xve_fwt_insert(struct xve_dev_priv *priv, struct xve_cm_ctx *ctx,
+ union ib_gid *gid, u32 qpn, char *smac, u16 vlan);
+void xve_fwt_cleanup(struct xve_dev_priv *xvep);
+int xve_advert_process(struct xve_dev_priv *priv, struct sk_buff *skb);
+struct xve_fwt_entry *xve_fwt_lookup(struct xve_fwt_s *xve_fwt, char *mac,
+ u16 vlan, int refresh);
+void xve_fwt_put_ctx(struct xve_fwt_s *xve_fwt,
+ struct xve_fwt_entry *fwt_entry);
+struct xve_fwt_entry *xve_fwt_list(struct xve_fwt_s *xve_fwt, int val);
+bool xve_fwt_entry_valid(struct xve_fwt_s *xve_fwt,
+ struct xve_fwt_entry *fwt_entry);
+void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path,
+ int do_lock);
+int xve_aging_task_machine(struct xve_dev_priv *priv);
+void xve_prepare_skb(struct xve_dev_priv *priv, struct sk_buff *skb);
+void xve_tables_exit(void);
+void xve_remove_one(struct xve_dev_priv *priv);
+struct xve_path *__path_find(struct net_device *netdev, void *gid);
+extern int xve_add_proc_entry(struct xve_dev_priv *vp);
+void xve_remove_proc_entry(struct xve_dev_priv *vp);
+extern int xve_change_rxbatch(struct xve_dev_priv *xvep, int flag);
+
+static inline int xve_continue_unload(void)
+{
+ return !(xve_debug_level & DEBUG_CONTINUE_UNLOAD);
+}
+
+static inline int xve_get_misc_info(void)
+{
+ return xve_debug_level & DEBUG_MISC_INFO;
+}
+
+static inline int xg_vlan_tx_tag_present(struct sk_buff *skb)
+{
+ struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+
+ return veth->h_vlan_proto == htons(ETH_P_8021Q);
+}
+
+static inline u16 xg_vlan_get_rxtag(struct sk_buff *skb)
+{
+ struct ethhdr *eh = (struct ethhdr *)(skb->data);
+ u16 vlan_tci = 0xFFFF;
+
+ if (eh->h_proto == htons(ETH_P_8021Q)) {
+ struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+
+ vlan_tci = be16_to_cpu(veth->h_vlan_TCI);
+ } else {
+ vlan_tci = 0;
+ }
+
+ return vlan_tci;
+
+}
+
+/*
+ * ipoib_calc_speed - calculate port speed
+ *
+ * @priv - device private data
+ *
+ * RETURNS: actual port speed
+ */
+static inline unsigned int xve_calc_speed(struct xve_dev_priv *priv)
+{
+ struct ib_port_attr *attr = &priv->port_attr;
+ unsigned int link_speed;
+ int port_width;
+
+ if (!priv)
+ return 0;
+
+ switch (attr->active_speed) {
+ case 0x1:
+ link_speed = SPEED_SDR;
+ break;
+ case 0x2:
+ link_speed = SPEED_DDR;
+ break;
+ case 0x4:
+ link_speed = SPEED_QDR;
+ break;
+ case 0x8:
+ link_speed = SPEED_FDR10;
+ break;
+ case 0x10:
+ link_speed = SPEED_FDR;
+ break;
+ case 0x20:
+ link_speed = SPEED_EDR;
+ break;
+ default:
+ link_speed = 0;
+ }
+
+ port_width = ib_width_enum_to_int(attr->active_width);
+ if (port_width < 0)
+ port_width = 0;
+
+ return link_speed * port_width;
+}
+
+/* Work queue functions */
+static inline void xve_queue_work(struct xve_dev_priv *priv, int work_type)
+{
+ struct xve_work *work;
+
+ if (test_bit(XVE_DELETING, &priv->flags)) {
+ priv->misc_counters[XVE_WQ_DONT_SCHEDULE]++;
+ return;
+ }
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+ work->priv = priv;
+
+ switch (work_type) {
+ case XVE_WQ_START_CMTXSTART:
+ INIT_WORK(&work->work, xve_cm_tx_start);
+ break;
+ case XVE_WQ_START_CMTXREAP:
+ INIT_WORK(&work->work, xve_cm_tx_reap);
+ break;
+ case XVE_WQ_START_CMRXREAP:
+ INIT_WORK(&work->work, xve_cm_rx_reap);
+ break;
+ case XVE_WQ_START_MCASTON:
+ INIT_WORK(&work->work, xve_mcast_carrier_on_task);
+ break;
+ case XVE_WQ_START_MCASTRESTART:
+ INIT_WORK(&work->work, xve_mcast_restart_task);
+ break;
+ case XVE_WQ_START_FLUSHLIGHT:
+ INIT_WORK(&work->work, xve_ib_dev_flush_light);
+ break;
+ case XVE_WQ_START_FLUSHNORMAL:
+ INIT_WORK(&work->work, xve_ib_dev_flush_normal);
+ break;
+ case XVE_WQ_START_FLUSHHEAVY:
+ INIT_WORK(&work->work, xve_ib_dev_flush_heavy);
+ break;
+ default:
+ priv->misc_counters[XVE_WQ_INVALID]++;
+ kfree(work);
+ work = NULL;
+ break;
+ }
+
+ if (!work)
+ return;
+
+ if (queue_work(xve_taskqueue, &work->work) != 0) {
+ atomic_inc(&priv->ref_cnt);
+ priv->misc_counters[work_type]++;
+ } else {
+ priv->misc_counters[XVE_WQ_FAILED]++;
+ priv->work_queue_failed = work_type;
+ }
+
+}
+
+static inline void xve_queue_dwork(struct xve_dev_priv *priv, int work_type,
+ u64 time)
+{
+
+ struct xve_work *work;
+
+ if (test_bit(XVE_DELETING, &priv->flags)) {
+ priv->misc_counters[XVE_WQ_DONT_SCHEDULE]++;
+ return;
+ }
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (!work)
+ return;
+ work->priv = priv;
+
+ switch (work_type) {
+ case XVE_WQ_START_PKEYPOLL:
+ INIT_DELAYED_WORK(&work->dwork, xve_pkey_poll);
+ break;
+ case XVE_WQ_START_AHREAP:
+ INIT_DELAYED_WORK(&work->dwork, xve_reap_ah);
+ break;
+ case XVE_WQ_START_FWT_AGING:
+ INIT_DELAYED_WORK(&work->dwork, xve_start_aging_work);
+ break;
+
+ default:
+ priv->misc_counters[XVE_WQ_INVALID]++;
+ kfree(work);
+ work = NULL;
+ break;
+ }
+
+ if (!work)
+ return;
+
+ if (queue_delayed_work(xve_taskqueue, &work->dwork, time) != 0) {
+ atomic_inc(&priv->ref_cnt);
+ priv->misc_counters[work_type]++;
+ } else {
+ priv->misc_counters[XVE_WQ_FAILED]++;
+ priv->work_queue_failed = work_type;
+ }
+
+}
+
+static inline void xve_queue_complete_work(struct xve_dev_priv *priv,
+ int work_type, u64 time)
+{
+ if (test_bit(XVE_DELETING, &priv->flags)) {
+ priv->misc_counters[XVE_WQ_DONT_SCHEDULE]++;
+ return;
+ }
+
+ switch (work_type) {
+ case XVE_WQ_START_CMSTALE:
+ queue_delayed_work(xve_taskqueue, &priv->stale_task, time);
+ break;
+ case XVE_WQ_START_MCASTJOIN:
+ queue_delayed_work(xve_taskqueue, &priv->mcast_join_task, time);
+ break;
+ case XVE_WQ_START_MCASTLEAVE:
+ queue_delayed_work(xve_taskqueue, &priv->mcast_leave_task,
+ time);
+ break;
+ default:
+ priv->misc_counters[XVE_WQ_INVALID]++;
+ break;
+ }
+
+ priv->misc_counters[work_type]++;
+
+}
+
+static inline struct xve_dev_priv *xve_get_wqctx(struct work_struct *work,
+ int work_type, u8 code)
+{
+ struct xve_work *xwork;
+ struct xve_dev_priv *priv;
+
+/*
+ * 2 For getting work strucute complete, 1 for Delayed work and
+ * 0 for Work structures
+ */
+ if (code == 2) {
+ switch (work_type) {
+ case XVE_WQ_FINISH_CMSTALE:
+ priv =
+ container_of(work, struct xve_dev_priv,
+ stale_task.work);
+ break;
+ case XVE_WQ_FINISH_MCASTJOIN:
+ priv =
+ container_of(work, struct xve_dev_priv,
+ mcast_join_task.work);
+ break;
+ case XVE_WQ_FINISH_MCASTLEAVE:
+ priv =
+ container_of(work, struct xve_dev_priv,
+ mcast_leave_task.work);
+ break;
+ default:
+ return NULL;
+ }
+ } else {
+ if (code == 1)
+ xwork = container_of(work, struct xve_work, dwork.work);
+ else
+ xwork = container_of(work, struct xve_work, work);
+ priv = xwork->priv;
+ kfree(xwork);
+ }
+ priv->misc_counters[work_type]++;
+ return priv;
+}
+
+/* DEBUG FUNCTIONS */
+static inline void dbg_dump_raw_pkt(unsigned char *buff, int length, char *name)
+{
+ int i;
+ int tmp_len;
+ u32 *data_ptr;
+ unsigned char *tmp_data_ptr;
+
+ if (!(xve_debug_level & DEBUG_TEST_INFO))
+ return;
+
+ printk("%s. Packet length is %d\n", name, length);
+ tmp_len = (length >> 2) + 1;
+ data_ptr = (u32 *) buff;
+ for (i = 0; i < tmp_len; i++) {
+ tmp_data_ptr = (unsigned char *)&data_ptr[i];
+ pr_info("%02x %02x %02x %02x\n",
+ tmp_data_ptr[0], tmp_data_ptr[1],
+ tmp_data_ptr[2], tmp_data_ptr[3]);
+ }
+}
+
+static inline void dbg_dump_skb(struct sk_buff *skb)
+{
+ char prefix[32];
+
+ if (!(xve_debug_level & DEBUG_TEST_INFO))
+ return;
+ snprintf(prefix, 32, "%s:skb-%p", skb->dev ? skb->dev->name : "NULL ",
+ skb);
+
+ pr_info("[%s] --- skb dump ---\n", prefix);
+ pr_info("[%s] len : %d\n", prefix, skb->len);
+ pr_info("[%s] truesize: %d\n", prefix, skb->truesize);
+ pr_info("[%s] data_len: %d\n", prefix, skb->data_len);
+ pr_info("[%s] nr_frags: %d\n", prefix, skb_shinfo(skb)->nr_frags);
+ pr_info("[%s] data : %p\n", prefix, (void *)skb->data);
+ pr_info("[%s] head : %p\n", prefix, (void *)skb->head);
+ pr_info("\n");
+
+}
+
+static inline void dumppkt(unsigned char *pkt, unsigned short len, char *name)
+{
+ int i = 0;
+ unsigned char *p = (unsigned char *)pkt;
+ char line[64] = { 0 };
+ char *cp = line;
+ char filter[] = "0123456789abcdef";
+ int printed_line = 0;
+
+ if (!(xve_debug_level & DEBUG_DUMP_PKTS))
+ return;
+
+ pr_info("%s DumpPacket of %d\n", name, len);
+
+ for (i = 0; i < len; i++) {
+ if ((i != 0) && (i % 8 == 0)) {
+ pr_info("%s\n", line);
+ memset(line, 0, sizeof(line));
+ cp = line;
+ printed_line = 1;
+ } else {
+ printed_line = 0;
+ }
+
+ if (*p > 0x0f)
+ *cp++ = filter[*p >> 4];
+ else
+ *cp++ = filter[0];
+
+ *cp++ = filter[*p++ & 0xf];
+ *cp++ = ':';
+ if (((len - i) == 1) && !printed_line) {
+ pr_info("%s\n", line);
+ memset(line, 0, sizeof(line));
+ cp = line;
+ }
+ }
+ *--cp = 0;
+}
+
+static inline void print_mgid(char *bcast_mgid_token, int debug)
+{
+ if (!debug && !(xve_debug_level & DEBUG_TEST_INFO))
+ return;
+ pr_info("MGID %x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x\n",
+ bcast_mgid_token[0] & 0xff, bcast_mgid_token[1] & 0xff,
+ bcast_mgid_token[2] & 0xff, bcast_mgid_token[3] & 0xff,
+ bcast_mgid_token[4] & 0xff, bcast_mgid_token[5] & 0xff,
+ bcast_mgid_token[6] & 0xff, bcast_mgid_token[7] & 0xff,
+ bcast_mgid_token[8] & 0xff, bcast_mgid_token[9] & 0xff,
+ bcast_mgid_token[10] & 0xff, bcast_mgid_token[11] & 0xff,
+ bcast_mgid_token[12] & 0xff, bcast_mgid_token[13] & 0xff,
+ bcast_mgid_token[14] & 0xff, bcast_mgid_token[15] & 0xff);
+}
+
+static inline void print_mgid_buf(char buffer[], char *bcast_mgid_token)
+{
+ sprintf(buffer, "%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x ",
+ bcast_mgid_token[0] & 0xff, bcast_mgid_token[1] & 0xff,
+ bcast_mgid_token[2] & 0xff, bcast_mgid_token[3] & 0xff,
+ bcast_mgid_token[4] & 0xff, bcast_mgid_token[5] & 0xff,
+ bcast_mgid_token[6] & 0xff, bcast_mgid_token[7] & 0xff,
+ bcast_mgid_token[8] & 0xff, bcast_mgid_token[9] & 0xff,
+ bcast_mgid_token[10] & 0xff, bcast_mgid_token[11] & 0xff,
+ bcast_mgid_token[12] & 0xff, bcast_mgid_token[13] & 0xff,
+ bcast_mgid_token[14] & 0xff, bcast_mgid_token[15] & 0xff);
+}
+
+#endif /* _XVE_H */
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+static int retry_count = 1;
+module_param_named(retry_count, retry_count, int, 0644);
+MODULE_PARM_DESC(retry_count, "Max number IB retries");
+
+static int rnr_retry_count = 4;
+module_param_named(rnr_retry_count, rnr_retry_count, int, 0644);
+MODULE_PARM_DESC(rnr_retry_count, "Max number rnr retries");
+
+#define XVE_CM_IETF_ID 0x1000000000000000ULL
+
+#define XVE_CM_RX_UPDATE_TIME (256 * HZ)
+#define XVE_CM_RX_TIMEOUT (2 * 256 * HZ)
+#define XVE_CM_RX_DELAY (3 * 256 * HZ)
+#define XVE_CM_RX_UPDATE_MASK (0x3)
+
+static struct ib_qp_attr xve_cm_err_attr = {
+ .qp_state = IB_QPS_ERR
+};
+
+#define XVE_CM_RX_DRAIN_WRID 0xffffffff
+
+static struct ib_send_wr xve_cm_rx_drain_wr = {
+ .wr_id = XVE_CM_RX_DRAIN_WRID,
+ .opcode = IB_WR_SEND,
+};
+
+static int xve_cm_tx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static void __xve_cm_tx_reap(struct xve_dev_priv *priv);
+
+static void xve_cm_dma_unmap_rx(struct xve_dev_priv *priv, int frags,
+ u64 mapping[XVE_CM_RX_SG])
+{
+ int i;
+
+ ib_dma_unmap_single(priv->ca, mapping[0], XVE_CM_HEAD_SIZE,
+ DMA_FROM_DEVICE);
+
+ for (i = 0; i < frags; ++i) {
+ xve_counters[XVE_NUM_PAGES_ALLOCED]--;
+ ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ }
+}
+
+static int xve_cm_post_receive_srq(struct net_device *netdev, int id)
+{
+ struct xve_dev_priv *priv = netdev_priv(netdev);
+ struct ib_recv_wr *bad_wr;
+ int i, ret;
+
+ priv->cm.rx_wr.wr_id = id | XVE_OP_CM | XVE_OP_RECV;
+
+ for (i = 0; i < priv->cm.num_frags; ++i)
+ priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
+
+ ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
+ if (unlikely(ret)) {
+ xve_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
+ xve_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
+ priv->cm.srq_ring[id].mapping);
+ dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
+ priv->cm.srq_ring[id].skb = NULL;
+ }
+
+ return ret;
+}
+
+static struct sk_buff *xve_cm_alloc_rx_skb(struct net_device *dev,
+ struct xve_cm_buf *rx_ring,
+ int id, int frags,
+ u64 mapping[XVE_CM_RX_SG])
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct sk_buff *skb;
+ int i;
+
+ skb = xve_dev_alloc_skb(priv, XVE_CM_HEAD_SIZE + NET_IP_ALIGN);
+ if (unlikely(!skb)) {
+ xve_warn(priv, "%s Failed to allocate skb\n", __func__);
+ return NULL;
+ }
+
+ skb_reserve(skb, NET_IP_ALIGN);
+
+ mapping[0] = ib_dma_map_single(priv->ca, skb->data, XVE_CM_HEAD_SIZE,
+ DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
+ xve_warn(priv, "%s Failed to Map skb\n", __func__);
+ dev_kfree_skb_any(skb);
+ return NULL;
+ }
+
+ for (i = 0; i < frags; i++) {
+ gfp_t alloc_flags = GFP_ATOMIC;
+ struct page *page = xve_alloc_page(alloc_flags);
+
+ if (!page) {
+ xve_warn(priv,
+ "%s Failed to allocate flags %x page state %d\n",
+ __func__, alloc_flags,
+ test_bit(XVE_OPER_UP, &priv->state));
+ goto partial_error;
+ }
+ skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
+
+ mapping[i + 1] =
+ ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page.p,
+ 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) {
+ xve_warn(priv, "%s Failed to Map page\n", __func__);
+ goto partial_error;
+ }
+ }
+
+ rx_ring[id].skb = skb;
+ return skb;
+
+partial_error:
+
+ ib_dma_unmap_single(priv->ca, mapping[0], XVE_CM_HEAD_SIZE,
+ DMA_FROM_DEVICE);
+
+ for (; i > 0; --i) {
+ xve_counters[XVE_NUM_PAGES_ALLOCED]--;
+ ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ }
+
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
+
+static void xve_cm_free_rx_ring(struct net_device *dev,
+ struct xve_cm_buf *rx_ring)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < xve_recvq_size; ++i) {
+ if (rx_ring[i].skb) {
+ xve_cm_dma_unmap_rx(priv, XVE_CM_RX_SG - 1,
+ rx_ring[i].mapping);
+ xve_dev_kfree_skb_any(priv, rx_ring[i].skb, 0);
+ }
+ }
+ vfree(rx_ring);
+}
+
+static void xve_cm_start_rx_drain(struct xve_dev_priv *priv)
+{
+ struct ib_send_wr *bad_wr;
+ struct xve_cm_ctx *p;
+
+ /* We only reserved 1 extra slot in CQ for drain WRs, so
+ * make sure we have at most 1 outstanding WR. */
+ if (list_empty(&priv->cm.rx_flush_list) ||
+ !list_empty(&priv->cm.rx_drain_list))
+ return;
+
+ /*
+ * QPs on flush list are error state. This way, a "flush
+ * error" WC will be immediately generated for each WR we post.
+ */
+ p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+ if (ib_post_send(p->qp, &xve_cm_rx_drain_wr, &bad_wr))
+ xve_warn(priv, "failed to post drain wr\n");
+
+ list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
+}
+
+static void xve_cm_rx_event_handler(struct ib_event *event, void *ctx)
+{
+ struct xve_cm_ctx *p = ctx;
+ struct xve_dev_priv *priv = netdev_priv(p->netdev);
+ unsigned long flags;
+
+ if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
+ return;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ list_move(&p->list, &priv->cm.rx_flush_list);
+ p->state = XVE_CM_RX_FLUSH;
+ xve_cm_start_rx_drain(priv);
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static struct ib_qp *xve_cm_create_rx_qp(struct net_device *dev,
+ struct xve_cm_ctx *p)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_init_attr attr = {
+ .event_handler = xve_cm_rx_event_handler,
+ .send_cq = priv->recv_cq, /* For drain WR */
+ .recv_cq = priv->recv_cq,
+ .srq = priv->cm.srq,
+ .cap.max_send_wr = 1, /* For drain WR */
+ .cap.max_send_sge = 1, /* 0 Seems not to work */
+ .sq_sig_type = IB_SIGNAL_ALL_WR,
+ .qp_type = IB_QPT_RC,
+ .qp_context = p,
+ };
+
+ return ib_create_qp(priv->pd, &attr);
+}
+
+static int xve_cm_modify_rx_qp(struct net_device *dev,
+ struct ib_cm_id *cm_id, struct ib_qp *qp,
+ unsigned psn)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
+ return ret;
+ }
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to modify QP to INIT: %d\n", ret);
+ return ret;
+ }
+ qp_attr.qp_state = IB_QPS_RTR;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
+ return ret;
+ }
+ qp_attr.rq_psn = psn;
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to modify QP to RTR: %d\n", ret);
+ return ret;
+ }
+
+ /*
+ * Current Mellanox HCA firmware won't generate completions
+ * with error for drain WRs unless the QP has been moved to
+ * RTS first. This work-around leaves a window where a QP has
+ * moved to error asynchronously, but this will eventually get
+ * fixed in firmware, so let's not error out if modify QP
+ * fails.
+ */
+ qp_attr.qp_state = IB_QPS_RTS;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
+ return 0;
+ }
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to modify QP to RTS: %d\n", ret);
+ return 0;
+ }
+
+ return 0;
+}
+
+static void xve_cm_init_rx_wr(struct net_device *dev,
+ struct ib_recv_wr *wr, struct ib_sge *sge)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < priv->cm.num_frags; ++i)
+ sge[i].lkey = priv->mr->lkey;
+
+ sge[0].length = XVE_CM_HEAD_SIZE;
+ for (i = 1; i < priv->cm.num_frags; ++i)
+ sge[i].length = PAGE_SIZE;
+
+ wr->next = NULL;
+ wr->sg_list = sge;
+ wr->num_sge = priv->cm.num_frags;
+}
+
+static int xve_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
+ struct ib_qp *qp, struct ib_cm_req_event_param *req,
+ unsigned psn)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_cm_data data = { };
+ struct ib_cm_rep_param rep = { };
+
+ data.qpn = cpu_to_be32(priv->qp->qp_num);
+ data.mtu = cpu_to_be32(XVE_CM_BUF_SIZE);
+
+ rep.private_data = &data;
+ rep.private_data_len = sizeof(data);
+ rep.flow_control = 0;
+ rep.rnr_retry_count = req->rnr_retry_count;
+ rep.srq = xve_cm_has_srq(dev);
+ rep.qp_num = qp->qp_num;
+ rep.starting_psn = psn;
+ return ib_send_cm_rep(cm_id, &rep);
+}
+
+static int xve_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+ struct net_device *dev = cm_id->context;
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_cm_ctx *p;
+ unsigned psn;
+ int ret;
+ union ib_gid *dgid = &event->param.req_rcvd.primary_path->dgid;
+ struct xve_path *path;
+
+ xve_debug(DEBUG_CM_INFO, priv, "%s REQ arrived\n", __func__);
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ p->netdev = dev;
+ strncpy(p->version, XSIGO_LOCAL_VERSION, 60);
+ p->direction = XVE_CM_ESTD_RX;
+ p->id = cm_id;
+ cm_id->context = p;
+ p->state = XVE_CM_RX_LIVE;
+ p->jiffies = jiffies;
+ INIT_LIST_HEAD(&p->list);
+ /*
+ * Save the remote GID
+ */
+ memcpy(&p->dgid, dgid, sizeof(union ib_gid));
+
+ p->qp = xve_cm_create_rx_qp(dev, p);
+ if (IS_ERR(p->qp)) {
+ ret = PTR_ERR(p->qp);
+ goto err_qp;
+ }
+
+ psn = xve_random32(priv);
+ ret = xve_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
+ if (ret)
+ goto err_modify;
+
+ spin_lock_irq(&priv->lock);
+ /* Find path and insert rx_qp */
+ path = __path_find(dev, dgid->raw);
+ if (path) {
+ char print[512];
+
+ print_mgid_buf(print, (char *)dgid->raw);
+ pr_info("XVE: %s Adding Rx QP to the path %s\n",
+ priv->xve_name, print);
+ path->cm_ctx_rx = p;
+ } else {
+ priv->counters[XVE_PATH_NOT_SETUP]++;
+ }
+
+ xve_queue_complete_work(priv, XVE_WQ_START_CMSTALE, XVE_CM_RX_DELAY);
+ /* Add this entry to passive ids list head, but do not re-add it
+ * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
+ p->jiffies = jiffies;
+ if (p->state == XVE_CM_RX_LIVE)
+ list_move(&p->list, &priv->cm.passive_ids);
+ spin_unlock_irq(&priv->lock);
+
+ ret = xve_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
+ if (ret) {
+ xve_warn(priv, "failed to send REP: %d\n", ret);
+ if (ib_modify_qp(p->qp, &xve_cm_err_attr, IB_QP_STATE))
+ xve_warn(priv, "unable to move qp to error state\n");
+ }
+ return 0;
+
+err_modify:
+ ib_destroy_qp(p->qp);
+err_qp:
+ kfree(p);
+ return ret;
+}
+
+static int xve_cm_rx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+ struct xve_cm_ctx *p;
+ struct xve_dev_priv *priv;
+
+ switch (event->event) {
+ case IB_CM_REQ_RECEIVED:
+ return xve_cm_req_handler(cm_id, event);
+ case IB_CM_DREQ_RECEIVED:
+ p = cm_id->context;
+ ib_send_cm_drep(cm_id, NULL, 0);
+ /* Fall through */
+ case IB_CM_REJ_RECEIVED:
+ p = cm_id->context;
+ priv = netdev_priv(p->netdev);
+ if (ib_modify_qp(p->qp, &xve_cm_err_attr, IB_QP_STATE))
+ xve_warn(priv, "unable to move qp to error state\n");
+ /* Fall through */
+ default:
+ return 0;
+ }
+}
+
+static void xve_cm_free_rx_reap_list(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_cm_ctx *rx, *n;
+ LIST_HEAD(list);
+
+ spin_lock_irq(&priv->lock);
+ list_splice_init(&priv->cm.rx_reap_list, &list);
+ spin_unlock_irq(&priv->lock);
+
+ list_for_each_entry_safe(rx, n, &list, list) {
+ ib_destroy_cm_id(rx->id);
+ ib_destroy_qp(rx->qp);
+ kfree(rx);
+ }
+}
+
+void xve_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_cm_buf *rx_ring;
+ unsigned int wr_id = wc->wr_id & ~(XVE_OP_CM | XVE_OP_RECV);
+ struct sk_buff *skb, *newskb = NULL;
+ struct xve_cm_ctx *p;
+ unsigned long flags;
+ u64 mapping[XVE_CM_RX_SG];
+ int frags;
+ struct sk_buff *small_skb;
+ u16 vlan;
+
+ xve_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
+ wr_id, wc->status);
+
+ if (unlikely(wr_id >= xve_recvq_size)) {
+ if (wr_id ==
+ (XVE_CM_RX_DRAIN_WRID & ~(XVE_OP_CM | XVE_OP_RECV))) {
+ spin_lock_irqsave(&priv->lock, flags);
+ list_splice_init(&priv->cm.rx_drain_list,
+ &priv->cm.rx_reap_list);
+ xve_cm_start_rx_drain(priv);
+ xve_queue_work(priv, XVE_WQ_START_CMRXREAP);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ } else
+ xve_warn(priv,
+ "cm recv completion event with wrid %d (> %d)\n",
+ wr_id, xve_recvq_size);
+ return;
+ }
+
+ p = wc->qp->qp_context;
+ if (p == NULL) {
+ pr_err("%s ERROR In CM Connection[RX] context Null [xve %s]",
+ __func__, priv->xve_name);
+ return;
+
+ }
+
+ if (p->direction != XVE_CM_ESTD_RX) {
+ pr_err("%s ERROR CM Connection[RX] is not yet", __func__);
+ pr_err(" established [xve %s]", priv->xve_name);
+ pr_err("p->direction %d\n", p->direction);
+ return;
+
+ }
+
+ rx_ring = priv->cm.srq_ring;
+ skb = rx_ring[wr_id].skb;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ if (!test_bit(XVE_DELETING, &priv->state)) {
+ pr_err("%s: cm recv error", priv->xve_name);
+ pr_err("(status=%d, wrid=%d", wc->status, wr_id);
+ pr_err("vend_err %x)\n", wc->vendor_err);
+ }
+ INC_RX_DROP_STATS(priv, dev);
+ goto repost;
+ }
+
+ if (unlikely(!(wr_id & XVE_CM_RX_UPDATE_MASK))) {
+ if (p && time_after_eq(jiffies,
+ p->jiffies + XVE_CM_RX_UPDATE_TIME)) {
+ spin_lock_irqsave(&priv->lock, flags);
+ p->jiffies = jiffies;
+ /* Move this entry to list head, but do not re-add it
+ * if it has been moved out of list. */
+ if (p->state == XVE_CM_RX_LIVE)
+ list_move(&p->list, &priv->cm.passive_ids);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
+ }
+
+ if (wc->byte_len < XVE_CM_COPYBREAK) {
+ int dlen = wc->byte_len;
+
+ small_skb = dev_alloc_skb(dlen + NET_IP_ALIGN);
+ if (small_skb) {
+ skb_reserve(small_skb, NET_IP_ALIGN);
+ ib_dma_sync_single_for_cpu(priv->ca,
+ rx_ring[wr_id].mapping[0],
+ dlen, DMA_FROM_DEVICE);
+ skb_copy_from_linear_data(skb, small_skb->data, dlen);
+ ib_dma_sync_single_for_device(priv->ca,
+ rx_ring[wr_id].mapping[0],
+ dlen, DMA_FROM_DEVICE);
+ skb_put(small_skb, dlen);
+ skb = small_skb;
+ priv->counters[XVE_RX_SMALLSKB_ALLOC_COUNTER]++;
+ goto copied;
+ }
+ }
+
+ frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
+ (unsigned)XVE_CM_HEAD_SIZE)) /
+ PAGE_SIZE;
+
+ newskb = xve_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping);
+ if (unlikely(!newskb)) {
+ /*
+ * If we can't allocate a new RX buffer, dump
+ * this packet and reuse the old buffer.
+ */
+ xve_dbg_data(priv,
+ "%s failed to allocate rc receive buffer %d\n",
+ __func__, wr_id);
+ INC_RX_DROP_STATS(priv, dev);
+ goto repost;
+ }
+
+ xve_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
+ memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping));
+
+ xve_dbg_data(priv, "%s received %d bytes, SLID 0x%04x\n", __func__,
+ wc->byte_len, wc->slid);
+
+ skb_put_frags(skb, XVE_CM_HEAD_SIZE, wc->byte_len, newskb);
+copied:
+
+ vlan = xg_vlan_get_rxtag(skb);
+ xve_fwt_insert(priv, p, &p->dgid, 0, skb->data + ETH_ALEN, vlan);
+ xve_prepare_skb(priv, skb);
+
+ xve_dbg_data(priv,
+ "%s Received RC packet %02x %02x %02x %02x %02x %02x",
+ __func__, skb->data[0], skb->data[1], skb->data[2],
+ skb->data[3], skb->data[4], skb->data[5]);
+ xve_dbg_data(priv,
+ "%02x %02x %02x %02x %02x %02x proto %x\n",
+ skb->data[6], skb->data[7], skb->data[8], skb->data[9],
+ skb->data[10], skb->data[11],
+ skb->protocol);
+ update_cm_rx_rate(p, skb->len);
+ xve_send_skb(priv, skb);
+repost:
+ if (unlikely(xve_cm_post_receive_srq(dev, wr_id))) {
+ xve_warn(priv, "xve_cm_post_receive_srq failed ");
+ xve_warn(priv, "for buf %d\n", wr_id);
+ }
+}
+
+static inline int post_send(struct xve_dev_priv *priv,
+ struct xve_cm_ctx *tx,
+ unsigned int wr_id, u64 addr, int len)
+{
+ struct ib_send_wr *bad_wr;
+
+ priv->tx_sge[0].addr = addr;
+ priv->tx_sge[0].length = len;
+
+ priv->tx_wr.num_sge = 1;
+ priv->tx_wr.wr_id = wr_id | XVE_OP_CM;
+
+ return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
+}
+
+static void xve_cm_tx_buf_free(struct xve_dev_priv *priv,
+ struct xve_cm_buf *tx_req)
+{
+ if ((tx_req->skb == NULL) || (tx_req->mapping[0] == 0))
+ xve_debug(DEBUG_DATA_INFO, priv,
+ "%s Contents of tx_req %p are NULL skb %p mapping %lld\n",
+ __func__, tx_req, tx_req->skb, tx_req->mapping[0]);
+ else
+ ib_dma_unmap_single(priv->ca, tx_req->mapping[0],
+ tx_req->skb->len, DMA_TO_DEVICE);
+
+ xve_dev_kfree_skb_any(priv, tx_req->skb, 1);
+ memset(tx_req, 0, sizeof(struct xve_cm_buf));
+}
+
+void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
+ struct xve_cm_ctx *tx)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_cm_buf *tx_req;
+ u64 addr;
+
+ if (unlikely(skb->len > tx->mtu + VLAN_ETH_HLEN)) {
+ xve_warn(priv,
+ "packet len %d (> %d) too long to send, dropping\n",
+ skb->len, tx->mtu);
+ INC_TX_DROP_STATS(priv, dev);
+ INC_TX_ERROR_STATS(priv, dev);
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
+ xve_dbg_data(priv,
+ "sending packet: head 0x%x length %d connection 0x%x\n",
+ tx->tx_head, skb->len, tx->qp->qp_num);
+
+ /*
+ * We put the skb into the tx_ring _before_ we call post_send()
+ * because it's entirely possible that the completion handler will
+ * run before we execute anything after the post_send(). That
+ * means we have to make sure everything is properly recorded and
+ * our state is consistent before we call post_send().
+ */
+ tx_req = &tx->tx_ring[tx->tx_head & (xve_sendq_size - 1)];
+ tx_req->skb = skb;
+ addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+ INC_TX_ERROR_STATS(priv, dev);
+ dev_kfree_skb_any(skb);
+ memset(tx_req, 0, sizeof(struct xve_cm_buf));
+ return;
+ }
+ tx_req->mapping[0] = addr;
+
+ if (unlikely(post_send(priv, tx, tx->tx_head & (xve_sendq_size - 1),
+ addr, skb->len))) {
+ xve_warn(priv, "post_send failed\n");
+ INC_TX_ERROR_STATS(priv, dev);
+ xve_cm_tx_buf_free(priv, tx_req);
+ } else {
+ ++tx->tx_head;
+ if (++priv->tx_outstanding == xve_sendq_size) {
+ xve_dbg_data(priv,
+ "TX ring 0x%x full, stopping kernel net queue\n",
+ tx->qp->qp_num);
+ if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
+ xve_warn(priv,
+ "request notify on send CQ failed\n");
+ priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+ priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
+ netif_stop_queue(dev);
+ }
+ }
+ priv->send_hbeat_flag = 0;
+
+}
+
+void xve_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_cm_ctx *tx = wc->qp->qp_context;
+ unsigned int wr_id = wc->wr_id & ~XVE_OP_CM;
+ struct xve_cm_buf *tx_req;
+
+ xve_dbg_data(priv, "cm send completion: id %d, status: %d\n",
+ wr_id, wc->status);
+
+ if (unlikely(wr_id >= xve_sendq_size)) {
+ xve_warn(priv, "cm send completion event with wrid %d (> %d)\n",
+ wr_id, xve_sendq_size);
+ return;
+ }
+
+ tx_req = &tx->tx_ring[wr_id];
+ xve_cm_tx_buf_free(priv, tx_req);
+ ++tx->tx_tail;
+
+ netif_tx_lock(dev);
+ if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
+ netif_queue_stopped(dev) &&
+ test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
+ priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
+ netif_wake_queue(dev);
+ }
+
+ if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) {
+ pr_err("%s: failed cm send event ", priv->xve_name);
+ pr_err("(status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
+ xve_cm_destroy_tx_deferred(tx);
+ }
+ netif_tx_unlock(dev);
+}
+
+int xve_cm_dev_open(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int ret;
+ u64 sid;
+
+ if (!priv->cm_supported)
+ return 0;
+
+ priv->cm.id = ib_create_cm_id(priv->ca, xve_cm_rx_handler, dev);
+ if (IS_ERR(priv->cm.id)) {
+ pr_warn("%s: failed to create CM ID\n", priv->ca->name);
+ ret = PTR_ERR(priv->cm.id);
+ goto err_cm;
+ }
+
+ sid = priv->local_gid.raw[14] << 8 | priv->local_gid.raw[15];
+ sid = XVE_CM_IETF_ID | sid << 32 | priv->net_id;
+
+ ret = ib_cm_listen(priv->cm.id, cpu_to_be64(sid), 0, NULL);
+ if (ret) {
+ pr_warn("%s: failed to listen on ID 0x%llx\n",
+ priv->ca->name, sid);
+ goto err_listen;
+ }
+
+ return 0;
+
+err_listen:
+ ib_destroy_cm_id(priv->cm.id);
+err_cm:
+ priv->cm.id = NULL;
+ return ret;
+}
+
+void xve_cm_dev_stop(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_cm_ctx *p;
+ unsigned long begin;
+ int ret;
+
+ if (!priv->cm_supported || !priv->cm.id)
+ return;
+
+ ib_destroy_cm_id(priv->cm.id);
+ priv->cm.id = NULL;
+
+ spin_lock_irq(&priv->lock);
+ while (!list_empty(&priv->cm.passive_ids)) {
+ p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
+ list_move(&p->list, &priv->cm.rx_error_list);
+ p->state = XVE_CM_RX_ERROR;
+ spin_unlock_irq(&priv->lock);
+ ret = ib_modify_qp(p->qp, &xve_cm_err_attr, IB_QP_STATE);
+ if (ret)
+ xve_warn(priv, "unable to move qp to error state: %d\n",
+ ret);
+ spin_lock_irq(&priv->lock);
+ }
+
+ /* Wait for all RX to be drained */
+ begin = jiffies;
+
+ while (!list_empty(&priv->cm.rx_error_list) ||
+ !list_empty(&priv->cm.rx_flush_list) ||
+ !list_empty(&priv->cm.rx_drain_list)) {
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ xve_warn(priv, "RX drain timing out\n");
+
+ /*
+ * assume the HW is wedged and just free up everything.
+ */
+ list_splice_init(&priv->cm.rx_flush_list,
+ &priv->cm.rx_reap_list);
+ list_splice_init(&priv->cm.rx_error_list,
+ &priv->cm.rx_reap_list);
+ list_splice_init(&priv->cm.rx_drain_list,
+ &priv->cm.rx_reap_list);
+ break;
+ }
+ spin_unlock_irq(&priv->lock);
+ msleep(20);
+ xve_drain_cq(dev);
+ spin_lock_irq(&priv->lock);
+ }
+
+ spin_unlock_irq(&priv->lock);
+
+ cancel_delayed_work_sync(&priv->stale_task);
+ xve_cm_free_rx_reap_list(dev);
+ __xve_cm_tx_reap(priv);
+
+}
+
+static int xve_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+ struct xve_cm_ctx *p = cm_id->context;
+ struct xve_dev_priv *priv = netdev_priv(p->netdev);
+ struct xve_cm_data *data = event->private_data;
+ struct sk_buff_head skqueue;
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+ struct sk_buff *skb;
+
+ p->mtu = be32_to_cpu(data->mtu);
+
+ if (p->mtu <= ETH_HLEN) {
+ xve_warn(priv, "Rejecting connection: mtu %d <= %d\n",
+ p->mtu, ETH_HLEN);
+ return -EINVAL;
+ }
+
+ qp_attr.qp_state = IB_QPS_RTR;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
+ return ret;
+ }
+
+ qp_attr.rq_psn = 0; /* FIXME */
+ ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to modify QP to RTR: %d\n", ret);
+ return ret;
+ }
+
+ qp_attr.qp_state = IB_QPS_RTS;
+ ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
+ return ret;
+ }
+ ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to modify QP to RTS: %d\n", ret);
+ return ret;
+ }
+
+ skb_queue_head_init(&skqueue);
+
+ spin_lock_irq(&priv->lock);
+ set_bit(XVE_FLAG_OPER_UP, &p->flags);
+ while ((skb = __skb_dequeue(&p->path->queue)))
+ __skb_queue_tail(&skqueue, skb);
+ spin_unlock_irq(&priv->lock);
+
+ while ((skb = __skb_dequeue(&skqueue))) {
+ skb->dev = p->netdev;
+ if (dev_queue_xmit(skb)) {
+ xve_warn(priv, "dev_queue_xmit failed ");
+ xve_warn(priv, "to requeue packet\n");
+ } else {
+ xve_dbg_data(priv, "%s Succefully sent skb\n",
+ __func__);
+ }
+
+ }
+
+ ret = ib_send_cm_rtu(cm_id, NULL, 0);
+ if (ret) {
+ xve_warn(priv, "failed to send RTU: %d\n", ret);
+ return ret;
+ }
+ return 0;
+}
+
+static struct ib_qp *xve_cm_create_tx_qp(struct net_device *dev,
+ struct xve_cm_ctx *tx)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_init_attr attr = {
+ .send_cq = priv->recv_cq,
+ .recv_cq = priv->recv_cq,
+ .srq = priv->cm.srq,
+ .cap.max_send_wr = xve_sendq_size,
+ .cap.max_send_sge = 1,
+ .sq_sig_type = IB_SIGNAL_ALL_WR,
+ .qp_type = IB_QPT_RC,
+ .qp_context = tx
+ };
+
+ return ib_create_qp(priv->pd, &attr);
+}
+
+static int xve_cm_send_req(struct net_device *dev,
+ struct ib_cm_id *id, struct ib_qp *qp,
+ struct ib_sa_path_rec *pathrec)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_cm_data data = { };
+ struct ib_cm_req_param req = { };
+ u64 sid;
+
+ sid = pathrec->dgid.raw[14] << 8 | pathrec->dgid.raw[15];
+ sid = XVE_CM_IETF_ID | sid << 32 | priv->net_id;
+
+ data.qpn = cpu_to_be32(priv->qp->qp_num);
+ data.mtu = cpu_to_be32(XVE_CM_BUF_SIZE);
+
+ req.primary_path = pathrec;
+ req.alternate_path = NULL;
+ req.service_id = cpu_to_be64(sid);
+ req.qp_num = qp->qp_num;
+ req.qp_type = qp->qp_type;
+ req.private_data = &data;
+ req.private_data_len = sizeof(data);
+ req.flow_control = 0;
+
+ req.starting_psn = 0; /* FIXME */
+
+ /*
+ * Pick some arbitrary defaults here; we could make these
+ * module parameters if anyone cared about setting them.
+ */
+ req.responder_resources = 4;
+ req.remote_cm_response_timeout = 20;
+ req.local_cm_response_timeout = 20;
+ req.retry_count = retry_count;
+ req.rnr_retry_count = rnr_retry_count;
+ req.max_cm_retries = 15;
+ req.srq = xve_cm_has_srq(dev);
+ return ib_send_cm_req(id, &req);
+}
+
+static int xve_cm_modify_tx_init(struct net_device *dev,
+ struct ib_cm_id *cm_id, struct ib_qp *qp)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_attr qp_attr;
+ int qp_attr_mask, ret;
+
+ ret =
+ ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
+ if (ret) {
+ xve_warn(priv, "pkey 0x%x not found: %d\n", priv->pkey, ret);
+ return ret;
+ }
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+ qp_attr.port_num = priv->port;
+ qp_attr_mask =
+ IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+ ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to modify tx QP to INIT: %d\n", ret);
+ return ret;
+ }
+ return 0;
+}
+
+static int xve_cm_tx_init(struct xve_cm_ctx *p, struct ib_sa_path_rec *pathrec)
+{
+ struct xve_dev_priv *priv = netdev_priv(p->netdev);
+ int ret;
+
+ p->tx_ring = vmalloc(xve_sendq_size * sizeof(*p->tx_ring));
+ if (!p->tx_ring) {
+ xve_warn(priv, "failed to allocate tx ring\n");
+ ret = -ENOMEM;
+ goto err_tx;
+ }
+ memset(p->tx_ring, 0, xve_sendq_size * sizeof(*p->tx_ring));
+
+ p->qp = xve_cm_create_tx_qp(p->netdev, p);
+ if (IS_ERR(p->qp)) {
+ ret = PTR_ERR(p->qp);
+ xve_warn(priv, "failed to allocate tx qp: %d\n", ret);
+ goto err_qp;
+ }
+
+ p->id = ib_create_cm_id(priv->ca, xve_cm_tx_handler, p);
+ if (IS_ERR(p->id)) {
+ ret = PTR_ERR(p->id);
+ xve_warn(priv, "failed to create tx cm id: %d\n", ret);
+ goto err_id;
+ }
+
+ ret = xve_cm_modify_tx_init(p->netdev, p->id, p->qp);
+ if (ret) {
+ xve_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
+ goto err_modify;
+ }
+
+ ret = xve_cm_send_req(p->netdev, p->id, p->qp, pathrec);
+ if (ret) {
+ xve_warn(priv, "failed to send cm req: %d\n", ret);
+ goto err_send_cm;
+ }
+
+ xve_debug(DEBUG_CM_INFO, priv, "%s Request connection", __func__);
+ xve_debug(DEBUG_CM_INFO, priv, "0x%x for gid", p->qp->qp_num);
+ xve_debug(DEBUG_CM_INFO, priv, "%pI6 net_id 0x%x\n", pathrec->dgid.raw,
+ priv->net_id);
+
+ return 0;
+
+err_send_cm:
+err_modify:
+ ib_destroy_cm_id(p->id);
+err_id:
+ p->id = NULL;
+ ib_destroy_qp(p->qp);
+err_qp:
+ p->qp = NULL;
+ vfree(p->tx_ring);
+err_tx:
+ return ret;
+}
+
+static void xve_cm_tx_destroy(struct xve_cm_ctx *p)
+{
+ struct xve_dev_priv *priv = netdev_priv(p->netdev);
+ struct xve_cm_buf *tx_req;
+ unsigned long begin;
+ unsigned long flags = 0;
+
+ xve_debug(DEBUG_CM_INFO, priv, "%s Destroy active conn", __func__);
+ xve_debug(DEBUG_CM_INFO, priv, "0x%x head", p->qp ? p->qp->qp_num : 0);
+ xve_debug(DEBUG_CM_INFO, priv, " 0x%x tail 0x%x\n", p->tx_head,
+ p->tx_tail);
+ if (p->id)
+ ib_destroy_cm_id(p->id);
+
+ if (p->tx_ring) {
+ /* Wait for all sends to complete */
+ if (!netif_carrier_ok(priv->netdev)
+ && unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+ while (poll_tx(priv)); /* nothing */
+
+ begin = jiffies;
+ while ((int)p->tx_tail - (int)p->tx_head < 0) {
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ xve_warn(priv,
+ "timing out; %d sends not completed\n",
+ p->tx_head - p->tx_tail);
+ goto timeout;
+ }
+
+ msleep(20);
+ }
+ }
+
+timeout:
+
+ spin_lock_irqsave(&priv->lock, flags);
+ while ((int)p->tx_tail - (int)p->tx_head < 0) {
+ tx_req = &p->tx_ring[p->tx_tail & (xve_sendq_size - 1)];
+ ++p->tx_tail;
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ xve_cm_tx_buf_free(priv, tx_req);
+ netif_tx_lock_bh(p->netdev);
+ if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
+ netif_queue_stopped(p->netdev) &&
+ test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
+ priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
+ netif_wake_queue(p->netdev);
+ }
+ netif_tx_unlock_bh(p->netdev);
+
+ spin_lock_irqsave(&priv->lock, flags);
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ pr_info("%s [xve %s] Destroyed active con", __func__, priv->xve_name);
+ pr_info("qp [0x%x] head", p->qp ? p->qp->qp_num : 0);
+ pr_info("0x%x tail 0x%x\n", p->tx_head, p->tx_tail);
+ if (p->qp)
+ ib_destroy_qp(p->qp);
+ if (p->tx_ring)
+ vfree(p->tx_ring);
+ if (p != NULL)
+ kfree(p);
+}
+
+static int xve_cm_tx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+ struct xve_cm_ctx *tx = cm_id->context;
+ struct xve_dev_priv *priv;
+ struct net_device *dev;
+ int ret;
+
+ if (tx == NULL) {
+ pr_info("XVE: %s qpn %d Event %d\n", __func__,
+ cm_id->remote_cm_qpn, event->event);
+ return 0;
+ }
+
+ priv = netdev_priv(tx->netdev);
+ dev = priv->netdev;
+ switch (event->event) {
+ case IB_CM_DREQ_RECEIVED:
+ xve_debug(DEBUG_CM_INFO, priv, "%s DREQ received QP %x\n",
+ __func__, tx->qp ? tx->qp->qp_num : 0);
+
+ ib_send_cm_drep(cm_id, NULL, 0);
+ break;
+ case IB_CM_REP_RECEIVED:
+ xve_debug(DEBUG_CM_INFO, priv, "%s REP received QP %x\n",
+ __func__, tx->qp ? tx->qp->qp_num : 0);
+ ret = xve_cm_rep_handler(cm_id, event);
+ if (ret)
+ ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+ NULL, 0, NULL, 0);
+ break;
+ case IB_CM_REQ_ERROR:
+ case IB_CM_REJ_RECEIVED:
+ case IB_CM_TIMEWAIT_EXIT:
+ pr_info("%s CM event %d [dev %s] QP %x\n", __func__,
+ event->event, dev->name, tx->qp ? tx->qp->qp_num : 0);
+ netif_tx_lock_bh(dev);
+ /*
+ * Should we delete all L2 entries XXX
+ */
+ xve_cm_destroy_tx_deferred(tx);
+ netif_tx_unlock_bh(dev);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+struct xve_cm_ctx *xve_cm_create_tx(struct net_device *dev,
+ struct xve_path *path)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_cm_ctx *tx;
+
+ tx = kzalloc(sizeof(*tx), GFP_ATOMIC);
+ if (!tx)
+ return NULL;
+
+ xve_cm_set(path, tx);
+ strncpy(tx->version, XSIGO_LOCAL_VERSION, 60);
+ tx->direction = XVE_CM_ESTD_TX;
+ tx->path = path;
+ tx->netdev = dev;
+ list_add(&tx->list, &priv->cm.start_list);
+ set_bit(XVE_FLAG_INITIALIZED, &tx->flags);
+ xve_queue_work(priv, XVE_WQ_START_CMTXSTART);
+ return tx;
+}
+
+void xve_cm_destroy_tx_deferred(struct xve_cm_ctx *tx)
+{
+ struct xve_dev_priv *priv = netdev_priv(tx->netdev);
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ clear_bit(XVE_FLAG_OPER_UP, &tx->flags);
+ if (test_and_clear_bit(XVE_FLAG_INITIALIZED, &tx->flags)) {
+ list_move(&tx->list, &priv->cm.reap_list);
+ xve_cm_set(tx->path, NULL);
+ xve_queue_work(priv, XVE_WQ_START_CMTXREAP);
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+void xve_cm_tx_start(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_CMTXSTART, 0);
+ struct net_device *dev = priv->netdev;
+ struct xve_cm_ctx *p;
+ unsigned long flags;
+ int ret;
+ struct ib_sa_path_rec pathrec;
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+
+ while (!list_empty(&priv->cm.start_list)) {
+ p = list_entry(priv->cm.start_list.next, typeof(*p), list);
+ list_del_init(&p->list);
+ memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec));
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+
+ ret = xve_cm_tx_init(p, &pathrec);
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+ }
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+ xve_put_ctx(priv);
+
+}
+
+static void __xve_cm_tx_reap(struct xve_dev_priv *priv)
+{
+ struct net_device *dev = priv->netdev;
+ struct xve_cm_ctx *p;
+ unsigned long flags;
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+
+ while (!list_empty(&priv->cm.reap_list)) {
+ p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
+ list_del(&p->list);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+ /*
+ * Destroy path
+ */
+ if (p->path)
+ xve_flush_single_path_by_gid(dev,
+ &p->path->pathrec.dgid);
+ xve_cm_tx_destroy(p);
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+ }
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+}
+
+void xve_cm_tx_reap(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_CMTXREAP, 0);
+ __xve_cm_tx_reap(priv);
+ xve_put_ctx(priv);
+}
+
+void xve_cm_rx_reap(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_CMRXREAP, 0);
+
+ xve_cm_free_rx_reap_list(priv->netdev);
+ xve_put_ctx(priv);
+}
+
+void xve_cm_stale_task(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_CMSTALE, 2);
+ struct xve_cm_ctx *p;
+ int ret;
+
+ spin_lock_irq(&priv->lock);
+ while (!list_empty(&priv->cm.passive_ids)) {
+ /* List is sorted by LRU, start from tail,
+ * stop when we see a recently used entry */
+ p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
+ if (time_before_eq(jiffies, p->jiffies + XVE_CM_RX_TIMEOUT))
+ break;
+ list_move(&p->list, &priv->cm.rx_error_list);
+ p->state = XVE_CM_RX_ERROR;
+ spin_unlock_irq(&priv->lock);
+ ret = ib_modify_qp(p->qp, &xve_cm_err_attr, IB_QP_STATE);
+ if (ret)
+ xve_warn(priv, "unable to move qp to error state: %d\n",
+ ret);
+ spin_lock_irq(&priv->lock);
+ }
+
+ if (!list_empty(&priv->cm.passive_ids))
+ xve_queue_complete_work(priv, XVE_WQ_START_CMSTALE,
+ XVE_CM_RX_DELAY);
+
+ spin_unlock_irq(&priv->lock);
+}
+
+static void xve_cm_create_srq(struct net_device *dev, int max_sge)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_srq_init_attr srq_init_attr = {
+ .attr = {
+ .max_wr = xve_recvq_size,
+ .max_sge = max_sge}
+ };
+
+ priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
+ if (IS_ERR(priv->cm.srq)) {
+ if (PTR_ERR(priv->cm.srq) != -ENOSYS)
+ pr_warn("%s: failed to allocate SRQ, error %ld\n",
+ priv->ca->name, PTR_ERR(priv->cm.srq));
+ priv->cm.srq = NULL;
+ return;
+ }
+
+ priv->cm.srq_ring =
+ vmalloc(xve_recvq_size * sizeof(*priv->cm.srq_ring));
+ if (!priv->cm.srq_ring) {
+ pr_warn("%s: failed to allocate CM SRQ ring (%d entries)\n",
+ priv->ca->name, xve_recvq_size);
+ ib_destroy_srq(priv->cm.srq);
+ priv->cm.srq = NULL;
+ return;
+ }
+
+ memset(priv->cm.srq_ring, 0,
+ xve_recvq_size * sizeof(*priv->cm.srq_ring));
+}
+
+int xve_cm_dev_init(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int i, ret;
+ struct ib_device_attr attr;
+
+ INIT_LIST_HEAD(&priv->cm.passive_ids);
+ INIT_LIST_HEAD(&priv->cm.reap_list);
+ INIT_LIST_HEAD(&priv->cm.start_list);
+ INIT_LIST_HEAD(&priv->cm.rx_error_list);
+ INIT_LIST_HEAD(&priv->cm.rx_flush_list);
+ INIT_LIST_HEAD(&priv->cm.rx_drain_list);
+ INIT_LIST_HEAD(&priv->cm.rx_reap_list);
+
+ ret = ib_query_device(priv->ca, &attr);
+ if (ret) {
+ pr_warn("ib_query_device() failed with %d\n", ret);
+ return ret;
+ }
+
+ /* Based on the admin mtu from the chassis */
+ attr.max_srq_sge =
+ min_t(int,
+ ALIGN((priv->admin_mtu + VLAN_ETH_HLEN),
+ PAGE_SIZE) / PAGE_SIZE, attr.max_srq_sge);
+ xve_debug(DEBUG_CM_INFO, priv, "%s max_srq_sge=%d\n", __func__,
+ attr.max_srq_sge);
+
+ xve_cm_create_srq(dev, attr.max_srq_sge);
+ if (xve_cm_has_srq(dev)) {
+ priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x20;
+ priv->cm.num_frags = attr.max_srq_sge;
+ xve_debug(DEBUG_CM_INFO, priv,
+ "%s max_cm_mtu = 0x%x, num_frags=%d\n", __func__,
+ priv->cm.max_cm_mtu, priv->cm.num_frags);
+ } else {
+ pr_notice("XVE: Non-SRQ mode not supported\n");
+ return -ENOTSUPP;
+ }
+
+ xve_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
+
+ if (xve_cm_has_srq(dev)) {
+ for (i = 0; i < xve_recvq_size; ++i) {
+ if (!xve_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
+ priv->cm.num_frags - 1,
+ priv->cm.
+ srq_ring[i].mapping)) {
+ xve_warn(priv,
+ "%s failed to allocate rc ",
+ __func__);
+ xve_warn(priv,
+ "receive buffer %d\n", i);
+ xve_cm_dev_cleanup(dev);
+ return -ENOMEM;
+ }
+
+ if (xve_cm_post_receive_srq(dev, i)) {
+ xve_warn(priv, "xve_cm_post_receive_srq ");
+ xve_warn(priv, "failed for buf %d\n", i);
+ xve_cm_dev_cleanup(dev);
+ return -EIO;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void xve_cm_dev_cleanup(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int ret;
+
+ if (!priv->cm.srq)
+ return;
+
+ xve_debug(DEBUG_CM_INFO, priv, "%s Cleanup xve CM\n", __func__);
+
+ ret = ib_destroy_srq(priv->cm.srq);
+ if (ret)
+ xve_warn(priv, "ib_destroy_srq failed: %d\n", ret);
+
+ priv->cm.srq = NULL;
+ if (!priv->cm.srq_ring)
+ return;
+
+ xve_cm_free_rx_ring(dev, priv->cm.srq_ring);
+ priv->cm.srq_ring = NULL;
+}
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _XVE_COMPAT_H
+#define _XVE_COMPAT_H
+#include "xve.h"
+#define XVE_OP_CM (1ul << 30)
+
+#include <net/icmp.h>
+static inline void skb_pkt_type(struct sk_buff *skb, unsigned char type)
+{
+ skb->pkt_type = type;
+}
+
+static inline void xve_dev_set_mtu(struct net_device *dev, int mtu)
+{
+ rtnl_lock();
+ dev_set_mtu(dev, mtu);
+ rtnl_unlock();
+}
+
+static inline void xg_skb_push(struct sk_buff *skb, unsigned int len)
+{
+ skb_push(skb, len);
+}
+
+static inline unsigned xve_random32(struct xve_dev_priv *priv)
+{
+ return (prandom_u32() & 0xffffff);
+}
+
+static inline struct proc_dir_entry *xg_create_proc_entry(const char *name,
+ mode_t mode,
+ struct proc_dir_entry
+ *parent, char root)
+{
+ struct proc_dir_entry *proc_dir = NULL;
+
+ if (mode == S_IFDIR)
+ proc_dir = proc_mkdir(name, parent);
+
+ return proc_dir;
+
+}
+
+static inline void xg_remove_proc_entry(const char *name,
+ struct proc_dir_entry *parent)
+{
+ return remove_proc_entry(name, parent);
+}
+
+#endif /* _XVE_COMPAT_H */
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+
+static void xve_get_drvinfo(struct net_device *netdev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ strncpy(drvinfo->driver, "xve", sizeof(drvinfo->driver) - 1);
+ strncpy(drvinfo->version, XVE_DRIVER_VERSION, 32);
+ strncpy(drvinfo->fw_version, "N/A", 32);
+ strncpy(drvinfo->bus_info, "N/A", 32);
+}
+
+static int xve_get_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *coal)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
+ coal->tx_coalesce_usecs = priv->ethtool.coalesce_usecs;
+ coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
+ coal->tx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
+
+ return 0;
+}
+
+static int xve_set_coalesce(struct net_device *dev,
+ struct ethtool_coalesce *coal)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int ret;
+
+ /*
+ * Since Xve uses a single CQ for both rx and tx, we assume
+ * that rx params dictate the configuration. These values are
+ * saved in the private data and returned when xve_get_coalesce()
+ * is called.
+ */
+ if (coal->rx_coalesce_usecs > 0xffff ||
+ coal->rx_max_coalesced_frames > 0xffff)
+ return -EINVAL;
+
+ if (coal->rx_max_coalesced_frames | coal->rx_coalesce_usecs) {
+ if (!coal->rx_max_coalesced_frames)
+ coal->rx_max_coalesced_frames = 0xffff;
+ else if (!coal->rx_coalesce_usecs)
+ coal->rx_coalesce_usecs = 0xffff;
+ }
+
+ ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
+ coal->rx_coalesce_usecs);
+
+ if (ret && ret != -ENOSYS) {
+ xve_warn(priv, "failed modifying CQ (%d)\n", ret);
+ return ret;
+ }
+
+ coal->tx_coalesce_usecs = coal->rx_coalesce_usecs;
+ coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames;
+ priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs;
+ priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames;
+
+ return 0;
+}
+
+static const char xve_stats_keys[][ETH_GSTRING_LEN] = {
+ "rx_packets", "rx_bytes", "rx_errors", "rx_drops",
+ "tx_packets", "tx_bytes", "tx_errors", "tx_drops",
+ "LRO aggregated", "LRO flushed",
+ "LRO avg aggr", "LRO no desc"
+};
+
+static void xve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+ switch (stringset) {
+ case ETH_SS_STATS:
+ memcpy(data, *xve_stats_keys, sizeof(xve_stats_keys));
+ break;
+ }
+}
+
+static int xve_get_sset_count(struct net_device *dev, int sset)
+{
+ switch (sset) {
+ case ETH_SS_STATS:
+ return ARRAY_SIZE(xve_stats_keys);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static void xve_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats, uint64_t *data)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int index = 0;
+
+ /* Get LRO statistics */
+ data[index++] = dev->stats.rx_packets;
+ data[index++] = dev->stats.rx_bytes;
+ data[index++] = dev->stats.rx_errors;
+ data[index++] = dev->stats.rx_dropped;
+
+ data[index++] = dev->stats.tx_packets;
+ data[index++] = dev->stats.tx_bytes;
+ data[index++] = dev->stats.tx_errors;
+ data[index++] = dev->stats.tx_dropped;
+
+ data[index++] = priv->lro.lro_mgr.stats.aggregated;
+ data[index++] = priv->lro.lro_mgr.stats.flushed;
+ if (priv->lro.lro_mgr.stats.flushed)
+ data[index++] = priv->lro.lro_mgr.stats.aggregated /
+ priv->lro.lro_mgr.stats.flushed;
+ else
+ data[index++] = 0;
+ data[index++] = priv->lro.lro_mgr.stats.no_desc;
+}
+
+static int xve_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+{
+ struct xve_dev_priv *xvep = netdev_priv(netdev);
+
+ ecmd->autoneg = 0;
+ ecmd->speed = SPEED_10000;
+ ecmd->duplex = DUPLEX_FULL; /* Duplex is hard coded */
+ if (netif_carrier_ok(netdev)) {
+ ecmd->speed = xvep->port_speed;
+ ecmd->advertising = ADVERTISED_10000baseT_Full;
+ ecmd->supported = SUPPORTED_10000baseT_Full |
+ SUPPORTED_FIBRE | SUPPORTED_Autoneg;
+ ecmd->port = PORT_FIBRE;
+ ecmd->transceiver = XCVR_EXTERNAL;
+
+ }
+ return 0;
+}
+
+static const struct ethtool_ops xve_ethtool_ops = {
+ .get_settings = xve_get_settings,
+ .get_drvinfo = xve_get_drvinfo,
+ .get_coalesce = xve_get_coalesce,
+ .set_coalesce = xve_set_coalesce,
+ .get_strings = xve_get_strings,
+ .get_sset_count = xve_get_sset_count,
+ .get_ethtool_stats = xve_get_ethtool_stats,
+ .get_link = ethtool_op_get_link,
+};
+
+void xve_set_ethtool_ops(struct net_device *dev)
+{
+ dev->ethtool_ops = &xve_ethtool_ops;
+}
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+#if defined(CONFIG_INFINIBAND_XVE_DEBUG)
+const struct file_operations;
+static struct dentry *xve_root;
+
+static void format_gid(union ib_gid *gid, char *buf)
+{
+ int i, n;
+
+ for (n = 0, i = 0; i < 8; ++i) {
+ n += sprintf(buf + n, "%x",
+ be16_to_cpu(((__be16 *) gid->raw)[i]));
+ if (i < 7)
+ buf[n++] = ':';
+ }
+}
+
+static void *xve_mcg_seq_start(struct seq_file *file, loff_t *pos)
+{
+ struct xve_mcast_iter *iter;
+ loff_t n = *pos;
+
+ iter = xve_mcast_iter_init(file->private);
+ if (!iter)
+ return NULL;
+
+ while (n--) {
+ if (xve_mcast_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+ }
+
+ return iter;
+}
+
+static void *xve_mcg_seq_next(struct seq_file *file, void *iter_ptr,
+ loff_t *pos)
+{
+ struct xve_mcast_iter *iter = iter_ptr;
+
+ (*pos)++;
+
+ if (xve_mcast_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+static void xve_mcg_seq_stop(struct seq_file *file, void *iter_ptr)
+{
+ /* nothing for now */
+}
+
+static int xve_mcg_seq_show(struct seq_file *file, void *iter_ptr)
+{
+ struct xve_mcast_iter *iter = iter_ptr;
+ char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
+ union ib_gid mgid;
+ unsigned long created;
+ unsigned int queuelen, complete, send_only;
+
+ if (!iter)
+ return 0;
+
+ xve_mcast_iter_read(iter, &mgid, &created, &queuelen,
+ &complete, &send_only);
+
+ format_gid(&mgid, gid_buf);
+
+ seq_printf(file,
+ "GID: %s\n"
+ " created: %10ld\n"
+ " queuelen: %9d\n"
+ " complete: %9s\n"
+ " send_only: %8s\n"
+ "\n",
+ gid_buf, created, queuelen,
+ complete ? "yes" : "no", send_only ? "yes" : "no");
+
+ return 0;
+}
+
+static const struct seq_operations xve_mcg_seq_ops = {
+ .start = xve_mcg_seq_start,
+ .next = xve_mcg_seq_next,
+ .stop = xve_mcg_seq_stop,
+ .show = xve_mcg_seq_show,
+};
+
+static int xve_mcg_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+
+ ret = seq_open(file, &xve_mcg_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->i_private;
+
+ return 0;
+}
+
+static const struct file_operations xve_mcg_fops = {
+ .owner = THIS_MODULE,
+ .open = xve_mcg_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+static void *xve_path_seq_start(struct seq_file *file, loff_t *pos)
+{
+ struct xve_path_iter *iter;
+ loff_t n = *pos;
+
+ iter = xve_path_iter_init(file->private);
+ if (!iter)
+ return NULL;
+
+ while (n--) {
+ if (xve_path_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+ }
+
+ return iter;
+}
+
+static void *xve_path_seq_next(struct seq_file *file, void *iter_ptr,
+ loff_t *pos)
+{
+ struct xve_path_iter *iter = iter_ptr;
+
+ (*pos)++;
+
+ if (xve_path_iter_next(iter)) {
+ kfree(iter);
+ return NULL;
+ }
+
+ return iter;
+}
+
+static void xve_path_seq_stop(struct seq_file *file, void *iter_ptr)
+{
+ /* nothing for now */
+}
+
+static int xve_path_seq_show(struct seq_file *file, void *iter_ptr)
+{
+ struct xve_path_iter *iter = iter_ptr;
+ char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
+ struct xve_path path;
+ int rate;
+
+ if (!iter)
+ return 0;
+
+ xve_path_iter_read(iter, &path);
+
+ format_gid(&path.pathrec.dgid, gid_buf);
+
+ seq_printf(file,
+ "GID: %s\n"
+ " complete: %6s\n",
+ gid_buf, path.pathrec.dlid ? "yes" : "no");
+
+ if (path.pathrec.dlid) {
+ rate = ib_rate_to_mult(path.pathrec.rate) * 25;
+
+ seq_printf(file,
+ " DLID: 0x%04x\n"
+ " SL: %12d\n"
+ " rate: %*d%s Gb/sec\n",
+ be16_to_cpu(path.pathrec.dlid),
+ path.pathrec.sl,
+ 10 - ((rate % 10) ? 2 : 0),
+ rate / 10, rate % 10 ? ".5" : "");
+ }
+
+ seq_putc(file, '\n');
+
+ return 0;
+}
+
+static const struct seq_operations xve_path_seq_ops = {
+ .start = xve_path_seq_start,
+ .next = xve_path_seq_next,
+ .stop = xve_path_seq_stop,
+ .show = xve_path_seq_show,
+};
+
+static int xve_path_open(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq;
+ int ret;
+
+ ret = seq_open(file, &xve_path_seq_ops);
+ if (ret)
+ return ret;
+
+ seq = file->private_data;
+ seq->private = inode->i_private;
+
+ return 0;
+}
+
+static const struct file_operations xve_path_fops = {
+ .owner = THIS_MODULE,
+ .open = xve_path_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
+void xve_create_debug_files(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ char name[IFNAMSIZ + sizeof "_path"];
+
+ snprintf(name, sizeof(name), "%s_mcg", dev->name);
+ priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
+ xve_root, dev, &xve_mcg_fops);
+ if (!priv->mcg_dentry)
+ xve_warn(priv, "failed to create mcg debug file\n");
+
+ snprintf(name, sizeof(name), "%s_path", dev->name);
+ priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
+ xve_root, dev, &xve_path_fops);
+ if (!priv->path_dentry)
+ xve_warn(priv, "failed to create path debug file\n");
+}
+
+void xve_delete_debug_files(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ if (priv->mcg_dentry != NULL)
+ debugfs_remove(priv->mcg_dentry);
+ if (priv->path_dentry != NULL)
+ debugfs_remove(priv->path_dentry);
+}
+
+int xve_register_debugfs(void)
+{
+ xve_root = debugfs_create_dir("xve", NULL);
+ return xve_root ? 0 : -ENOMEM;
+}
+
+void xve_unregister_debugfs(void)
+{
+ debugfs_remove(xve_root);
+}
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+static DEFINE_MUTEX(pkey_mutex);
+
+struct xve_ah *xve_create_ah(struct net_device *dev,
+ struct ib_pd *pd, struct ib_ah_attr *attr)
+{
+ struct xve_ah *ah;
+
+ ah = kmalloc(sizeof(*ah), GFP_KERNEL);
+ if (!ah)
+ return NULL;
+
+ ah->dev = dev;
+ ah->last_send = 0;
+ kref_init(&ah->ref);
+
+ ah->ah = ib_create_ah(pd, attr);
+ if (IS_ERR(ah->ah)) {
+ kfree(ah);
+ ah = NULL;
+ } else
+ xve_debug(DEBUG_MCAST_INFO, netdev_priv(dev),
+ "%s Created ah %p\n", __func__, ah->ah);
+
+ return ah;
+}
+
+void xve_free_ah(struct kref *kref)
+{
+ struct xve_ah *ah = container_of(kref, struct xve_ah, ref);
+ struct xve_dev_priv *priv = netdev_priv(ah->dev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ list_add_tail(&ah->list, &priv->dead_ahs);
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void xve_ud_dma_unmap_rx(struct xve_dev_priv *priv,
+ u64 mapping[XVE_UD_RX_SG])
+{
+ if (xve_ud_need_sg(priv->max_ib_mtu)) {
+ ib_dma_unmap_single(priv->ca, mapping[0], XVE_UD_HEAD_SIZE,
+ DMA_FROM_DEVICE);
+ ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ xve_counters[XVE_NUM_PAGES_ALLOCED]--;
+ } else
+ ib_dma_unmap_single(priv->ca, mapping[0],
+ XVE_UD_BUF_SIZE(priv->max_ib_mtu),
+ DMA_FROM_DEVICE);
+}
+
+static void xve_ud_skb_put_frags(struct xve_dev_priv *priv,
+ struct sk_buff *skb, unsigned int length)
+{
+ if (xve_ud_need_sg(priv->max_ib_mtu)) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+ unsigned int size;
+ /*
+ * There is only two buffers needed for max_payload = 4K,
+ * first buf size is XVE_UD_HEAD_SIZE
+ */
+ skb->tail += XVE_UD_HEAD_SIZE;
+ skb->len += length;
+
+ size = length - XVE_UD_HEAD_SIZE;
+
+ frag->size = size;
+ skb->data_len += size;
+ skb->truesize += size;
+ } else {
+ skb_put(skb, length);
+ }
+
+}
+
+static int xve_ib_post_receive(struct net_device *dev, int id)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_recv_wr *bad_wr;
+ int ret;
+
+ priv->rx_wr.wr_id = id | XVE_OP_RECV;
+ priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0];
+ priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];
+
+ ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr);
+ if (unlikely(ret)) {
+ xve_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
+ xve_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
+ dev_kfree_skb_any(priv->rx_ring[id].skb);
+ priv->rx_ring[id].skb = NULL;
+ }
+
+ return ret;
+}
+
+static struct sk_buff *xve_alloc_rx_skb(struct net_device *dev, int id)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct sk_buff *skb;
+ int buf_size;
+ u64 *mapping;
+
+ if (xve_ud_need_sg(priv->max_ib_mtu))
+ buf_size = XVE_UD_HEAD_SIZE;
+ else
+ buf_size = XVE_UD_BUF_SIZE(priv->max_ib_mtu);
+
+ skb = xve_dev_alloc_skb(priv, buf_size + 10);
+ if (unlikely(!skb))
+ return NULL;
+
+ /*
+ * Eth header is 14 bytes, IB will leave a 40 byte gap for a GRH
+ * so we need 10 more bytes to get to 64 and align the
+ * IP header to a multiple of 16.
+ */
+ skb_reserve(skb, 10);
+
+ mapping = priv->rx_ring[id].mapping;
+ mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
+ DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
+ goto error;
+
+ if (xve_ud_need_sg(priv->max_ib_mtu)) {
+ struct page *page = xve_alloc_page(GFP_ATOMIC);
+
+ if (!page)
+ goto partial_error;
+ skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
+ mapping[1] =
+ ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[0].page.p,
+ 0, PAGE_SIZE, DMA_FROM_DEVICE);
+ if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
+ goto partial_error;
+ }
+
+ priv->rx_ring[id].skb = skb;
+ return skb;
+
+partial_error:
+ ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
+error:
+ dev_kfree_skb_any(skb);
+ return NULL;
+}
+
+static int xve_ib_post_receives(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int i;
+
+ for (i = 0; i < xve_recvq_size; ++i) {
+ if (!xve_alloc_rx_skb(dev, i)) {
+ xve_warn(priv,
+ "%s failed to allocate ib receive buffer %d\n",
+ __func__, i);
+ return -ENOMEM;
+ }
+ if (xve_ib_post_receive(dev, i)) {
+ xve_warn(priv,
+ "%s xve_ib_post_receive failed for buf %d\n",
+ __func__, i);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static void xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ unsigned int wr_id = wc->wr_id & ~XVE_OP_RECV;
+ struct sk_buff *skb;
+ u64 mapping[XVE_UD_RX_SG];
+ struct ib_packed_grh *grhhdr;
+ char *smac;
+ u16 vlan;
+
+ xve_dbg_data(priv, "recv completion: id %d, status: %d\n",
+ wr_id, wc->status);
+
+ if (unlikely(wr_id >= xve_recvq_size)) {
+ xve_warn(priv, "recv completion event with wrid %d (> %d)\n",
+ wr_id, xve_recvq_size);
+ return;
+ }
+
+ skb = priv->rx_ring[wr_id].skb;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ xve_warn(priv, "failed recv event ");
+ xve_warn(priv, "(status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
+ }
+ xve_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
+ dev_kfree_skb_any(skb);
+ priv->rx_ring[wr_id].skb = NULL;
+ return;
+ }
+
+ /*
+ * Drop packets that this interface sent, ie multicast packets
+ * that the HCA has replicated.
+ */
+ if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
+ goto repost;
+
+ memcpy(mapping, priv->rx_ring[wr_id].mapping,
+ XVE_UD_RX_SG * sizeof(*mapping));
+
+ /*
+ * If we can't allocate a new RX buffer, dump
+ * this packet and reuse the old buffer.
+ */
+ if (unlikely(!xve_alloc_rx_skb(dev, wr_id))) {
+ INC_RX_DROP_STATS(priv, dev);
+ goto repost;
+ }
+
+ xve_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+ wc->byte_len, wc->slid);
+
+ xve_ud_dma_unmap_rx(priv, mapping);
+ xve_ud_skb_put_frags(priv, skb, wc->byte_len);
+
+ grhhdr = (struct ib_packed_grh *)(skb->data);
+ smac = skb->data + IB_GRH_BYTES + ETH_ALEN;
+ skb_pull(skb, IB_GRH_BYTES);
+ vlan = xg_vlan_get_rxtag(skb);
+ xve_fwt_insert(priv, NULL, &grhhdr->source_gid, wc->src_qp, smac, vlan);
+ xve_prepare_skb(priv, skb);
+
+ xve_test("%s RX UD pkt %02x %02x %02x %02x %02x %02x %02x %02x %02x",
+ __func__, skb->data[0], skb->data[1], skb->data[2],
+ skb->data[3], skb->data[4], skb->data[5], skb->data[6],
+ skb->data[7], skb->data[8]);
+ xve_test("%02x %02x %02x proto %x for %s\n",
+ skb->data[9], skb->data[10], skb->data[11],
+ skb->protocol, priv->xve_name);
+ xve_send_skb(priv, skb);
+repost:
+ if (unlikely(xve_ib_post_receive(dev, wr_id))) {
+ xve_warn(priv, "xve_ib_post_receive failed ");
+ xve_warn(priv, "for buf %d\n", wr_id);
+ }
+}
+
+static int xve_dma_map_tx(struct ib_device *ca, struct xve_tx_buf *tx_req)
+{
+ struct sk_buff *skb = tx_req->skb;
+ u64 *mapping = tx_req->mapping;
+ int i;
+ int off;
+
+ if (skb_headlen(skb)) {
+ mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
+ DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
+ return -EIO;
+
+ off = 1;
+ } else
+ off = 0;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ mapping[i + off] = ib_dma_map_page(ca, skb_frag_page(frag),
+ frag->page_offset,
+ frag->size, DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
+ goto partial_error;
+ }
+ return 0;
+
+partial_error:
+ for (; i > 0; --i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
+
+ ib_dma_unmap_page(ca, mapping[i - !off], frag->size,
+ DMA_TO_DEVICE);
+ }
+
+ if (off)
+ ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb),
+ DMA_TO_DEVICE);
+
+ return -EIO;
+}
+
+static void xve_dma_unmap_tx(struct ib_device *ca, struct xve_tx_buf *tx_req)
+{
+ struct sk_buff *skb = tx_req->skb;
+ u64 *mapping = tx_req->mapping;
+ int i;
+ int off;
+
+ if (skb_headlen(skb)) {
+ ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb),
+ DMA_TO_DEVICE);
+ off = 1;
+ } else
+ off = 0;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ ib_dma_unmap_page(ca, mapping[i + off], frag->size,
+ DMA_TO_DEVICE);
+ }
+}
+
+static void xve_free_txbuf_memory(struct xve_dev_priv *priv,
+ struct xve_tx_buf *tx_req)
+{
+ if ((tx_req->skb == NULL) || (!tx_req->mapping[0]))
+ xve_debug(DEBUG_DATA_INFO, priv,
+ "%s [ca %p] tx_req skb %p mapping %lld\n",
+ __func__, priv->ca, tx_req->skb, tx_req->mapping[0]);
+ else
+ xve_dma_unmap_tx(priv->ca, tx_req);
+
+ xve_dev_kfree_skb_any(priv, tx_req->skb, 1);
+ memset(tx_req, 0, sizeof(struct xve_tx_buf));
+}
+
+static void xve_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ unsigned int wr_id = wc->wr_id;
+ struct xve_tx_buf *tx_req;
+
+ xve_dbg_data(priv, "send completion: id %d, status: %d\n",
+ wr_id, wc->status);
+
+ if (unlikely(wr_id >= xve_sendq_size)) {
+ xve_warn(priv, "send completion event with wrid %d (> %d)\n",
+ wr_id, xve_sendq_size);
+ return;
+ }
+
+ tx_req = &priv->tx_ring[wr_id];
+ xve_free_txbuf_memory(priv, tx_req);
+
+ ++priv->tx_tail;
+
+ if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
+ netif_queue_stopped(dev) &&
+ test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
+ priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
+ netif_wake_queue(dev);
+ }
+
+ if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) {
+ xve_warn(priv, "failed send event ");
+ xve_warn(priv, "(status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
+ }
+}
+
+int poll_tx(struct xve_dev_priv *priv)
+{
+ int n, i, tot = 0;
+
+ do {
+ n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
+ /* handle multiple WC's in one call */
+ if (likely(n > 0)) {
+ for (i = 0; i < n; ++i)
+ xve_ib_handle_tx_wc(priv->netdev,
+ priv->send_wc + i);
+ tot += n;
+ } else if (n == 0) {
+ break;
+ } else {
+ xve_warn(priv, "%s ib_poll_cq() failed, rc %d\n",
+ __func__, n);
+ }
+
+ } while (n == MAX_SEND_CQE);
+
+ return tot;
+}
+
+static int poll_rx(struct xve_dev_priv *priv, int num_polls, int *done,
+ int flush)
+{
+ int n, i;
+
+ n = ib_poll_cq(priv->recv_cq, num_polls, priv->ibwc);
+ for (i = 0; i < n; ++i) {
+ /*
+ * Convert any successful completions to flush
+ * errors to avoid passing packets up the
+ * stack after bringing the device down.
+ */
+ if (flush && (priv->ibwc[i].status == IB_WC_SUCCESS))
+ priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
+
+ if (priv->ibwc[i].wr_id & XVE_OP_RECV) {
+ ++(*done);
+ if (priv->ibwc[i].wr_id & XVE_OP_CM)
+ xve_cm_handle_rx_wc(priv->netdev,
+ priv->ibwc + i);
+ else
+ xve_ib_handle_rx_wc(priv->netdev,
+ priv->ibwc + i);
+ } else
+ xve_cm_handle_tx_wc(priv->netdev, priv->ibwc + i);
+ }
+ return n;
+}
+
+int xve_poll(struct napi_struct *napi, int budget)
+{
+ struct xve_dev_priv *priv =
+ container_of(napi, struct xve_dev_priv, napi);
+ struct net_device *dev = priv->netdev;
+ int done, n, t;
+ unsigned long flags = 0;
+
+ done = 0;
+
+ priv->counters[XVE_NAPI_POLL_COUNTER]++;
+ /*
+ * If not connected complete it
+ */
+ if (!test_bit(XVE_OPER_UP, &priv->state)) {
+ napi_complete(&priv->napi);
+ clear_bit(XVE_INTR_ENABLED, &priv->state);
+ return 0;
+ }
+
+poll_more:
+ while (done < budget) {
+ int max = (budget - done);
+
+ t = min(XVE_NUM_WC, max);
+ n = poll_rx(priv, t, &done, 0);
+ if (n != t)
+ break;
+ }
+
+ if (done < budget) {
+ if (dev->features & NETIF_F_LRO)
+ lro_flush_all(&priv->lro.lro_mgr);
+
+ napi_complete(napi);
+ clear_bit(XVE_OVER_QUOTA, &priv->state);
+ } else {
+ set_bit(XVE_OVER_QUOTA, &priv->state);
+ priv->counters[XVE_RX_QUOTA_EXCEEDED_COUNTER]++;
+ return done;
+ }
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
+ test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
+ test_bit(XVE_OPER_UP, &priv->state) &&
+ !test_bit(XVE_DELETING, &priv->state)) {
+ set_bit(XVE_INTR_ENABLED, &priv->state);
+ if (unlikely
+ (ib_req_notify_cq
+ (priv->recv_cq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS))
+ && napi_reschedule(napi)) {
+ priv->counters[XVE_NAPI_RESCHEDULE_COUNTER]++;
+ spin_unlock_irqrestore(&priv->lock, flags);
+ goto poll_more;
+ }
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return done;
+}
+
+void xve_ib_completion(struct ib_cq *cq, void *dev_ptr)
+{
+ struct net_device *dev = dev_ptr;
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ xve_data_recv_handler(priv);
+
+}
+
+/*
+ * Data is pending, in interrupt context
+ */
+void xve_data_recv_handler(struct xve_dev_priv *priv)
+{
+
+ if (test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
+ test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
+ test_bit(XVE_OPER_UP, &priv->state) &&
+ !test_bit(XVE_DELETING, &priv->state)) {
+ priv->counters[XVE_NAPI_SCHED_COUNTER]++;
+ clear_bit(XVE_INTR_ENABLED, &priv->state);
+ napi_schedule(&priv->napi);
+ } else
+ priv->counters[XVE_NAPI_NOTSCHED_COUNTER]++;
+}
+
+void xve_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
+{
+ struct xve_dev_priv *priv = netdev_priv((struct net_device *)dev_ptr);
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (test_bit(XVE_OPER_UP, &priv->state) &&
+ !test_bit(XVE_DELETING, &priv->state)) {
+ poll_tx(priv);
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static inline int post_send(struct xve_dev_priv *priv,
+ unsigned int wr_id,
+ struct ib_ah *address, u32 qpn,
+ struct xve_tx_buf *tx_req, void *head, int hlen)
+{
+ struct ib_send_wr *bad_wr;
+ int i, off;
+ struct sk_buff *skb = tx_req->skb;
+ skb_frag_t *frags = skb_shinfo(skb)->frags;
+ int nr_frags = skb_shinfo(skb)->nr_frags;
+ u64 *mapping = tx_req->mapping;
+
+ if (skb_headlen(skb)) {
+ priv->tx_sge[0].addr = mapping[0];
+ priv->tx_sge[0].length = skb_headlen(skb);
+ off = 1;
+ } else
+ off = 0;
+
+ for (i = 0; i < nr_frags; ++i) {
+ priv->tx_sge[i + off].addr = mapping[i + off];
+ priv->tx_sge[i + off].length = frags[i].size;
+ }
+ priv->tx_wr.num_sge = nr_frags + off;
+ priv->tx_wr.wr_id = wr_id;
+ priv->tx_wr.wr.ud.remote_qpn = qpn;
+ priv->tx_wr.wr.ud.ah = address;
+
+ if (head) {
+ priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size;
+ priv->tx_wr.wr.ud.header = head;
+ priv->tx_wr.wr.ud.hlen = hlen;
+ priv->tx_wr.opcode = IB_WR_LSO;
+ } else
+ priv->tx_wr.opcode = IB_WR_SEND;
+
+ return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
+}
+
+void xve_send(struct net_device *dev, struct sk_buff *skb,
+ struct xve_ah *address, u32 qpn)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_tx_buf *tx_req;
+ int hlen;
+ void *phead;
+
+ if (skb_is_gso(skb)) {
+ hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
+ phead = skb->data;
+ if (unlikely(!skb_pull(skb, hlen))) {
+ xve_warn(priv,
+ "%s linear data too small dropping %ld packets %s\n",
+ __func__, dev->stats.tx_dropped, dev->name);
+ INC_TX_DROP_STATS(priv, dev);
+ INC_TX_ERROR_STATS(priv, dev);
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ } else {
+ if (unlikely(skb->len > priv->mcast_mtu + VLAN_ETH_HLEN)) {
+ xve_warn(priv, "%s packet len %d", __func__, skb->len);
+ xve_warn(priv, "(> %d) too long to", priv->mcast_mtu);
+ xve_warn(priv, "send,dropping %ld packets %s\n",
+ dev->stats.tx_dropped, dev->name);
+ INC_TX_DROP_STATS(priv, dev);
+ INC_TX_ERROR_STATS(priv, dev);
+ dev_kfree_skb_any(skb);
+ return;
+ }
+ phead = NULL;
+ hlen = 0;
+ }
+
+ xve_dbg_data(priv,
+ "%s sending packet, length=%d address=%p qpn=0x%06x\n",
+ __func__, skb->len, address, qpn);
+
+ /*
+ * We put the skb into the tx_ring _before_ we call post_send()
+ * because it's entirely possible that the completion handler will
+ * run before we execute anything after the post_send(). That
+ * means we have to make sure everything is properly recorded and
+ * our state is consistent before we call post_send().
+ */
+ tx_req = &priv->tx_ring[priv->tx_head & (xve_sendq_size - 1)];
+ tx_req->skb = skb;
+ if (unlikely(xve_dma_map_tx(priv->ca, tx_req))) {
+ INC_TX_ERROR_STATS(priv, dev);
+ dev_kfree_skb_any(tx_req->skb);
+ memset(tx_req, 0, sizeof(struct xve_tx_buf));
+ return;
+ }
+
+ if (++priv->tx_outstanding == xve_sendq_size) {
+ xve_dbg_data(priv,
+ "%s TX ring full, stopping kernel net queue\n",
+ __func__);
+ if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
+ xve_warn(priv, "%s request notify on send CQ failed\n",
+ __func__);
+ priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+ priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
+ netif_stop_queue(dev);
+ }
+
+ if (unlikely(post_send(priv, priv->tx_head & (xve_sendq_size - 1),
+ address->ah, qpn, tx_req, phead, hlen))) {
+ xve_warn(priv, "%s post_send failed\n", __func__);
+ INC_TX_ERROR_STATS(priv, dev);
+ --priv->tx_outstanding;
+ priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+ xve_free_txbuf_memory(priv, tx_req);
+ if (netif_queue_stopped(dev)) {
+ priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
+ netif_wake_queue(dev);
+ }
+ } else {
+ address->last_send = priv->tx_head;
+ ++priv->tx_head;
+ skb_orphan(skb);
+ }
+ priv->send_hbeat_flag = 0;
+ if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+ poll_tx(priv);
+}
+
+static void __xve_reap_ah(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_ah *ah, *tah;
+ LIST_HEAD(remove_list);
+ unsigned long flags = 0;
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+
+ list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
+ if ((int)priv->tx_tail - (int)ah->last_send >= 0) {
+ list_del(&ah->list);
+ ib_destroy_ah(ah->ah);
+ kfree(ah);
+ }
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+}
+
+void xve_reap_ah(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_AHREAP, 1);
+ struct net_device *dev = priv->netdev;
+
+ __xve_reap_ah(dev);
+
+ /* STOP_REAPER is set in xve_stop */
+ if (!test_bit(XVE_STOP_REAPER, &priv->flags))
+ xve_queue_dwork(priv, XVE_WQ_START_AHREAP,
+ round_jiffies_relative(HZ));
+ xve_put_ctx(priv);
+}
+
+static void xve_ah_dev_cleanup(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ unsigned long begin;
+
+ begin = jiffies;
+
+ while (!list_empty(&priv->dead_ahs)) {
+ __xve_reap_ah(dev);
+
+ if (time_after(jiffies, begin + HZ)) {
+ xve_warn(priv,
+ "timing out; will leak address handles\n");
+ break;
+ }
+
+ msleep(20);
+ }
+}
+
+static void xve_pkey_dev_check_presence(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ u16 pkey_index = 0;
+
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
+ clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+ else
+ set_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+}
+
+int xve_ib_dev_up(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ xve_debug(DEBUG_IBDEV_INFO, priv, "%s Bring up ib_dev\n", __func__);
+ xve_pkey_dev_check_presence(dev);
+ if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags)) {
+ xve_debug(DEBUG_IBDEV_INFO, priv, "%s PKEY is not assigned\n",
+ __func__);
+ return 0;
+ }
+
+ set_bit(XVE_FLAG_OPER_UP, &priv->flags);
+
+ return xve_mcast_start_thread(dev);
+}
+
+int xve_ib_dev_down(struct net_device *dev, int flush)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ xve_debug(DEBUG_IBDEV_INFO, priv, "%s downing ib_dev\n", __func__);
+ if (!test_and_clear_bit(XVE_FLAG_OPER_UP, &priv->flags)) {
+ xve_debug(DEBUG_IBDEV_INFO, priv,
+ "%s Down IB without being up\n", __func__);
+ return 0;
+ }
+
+ netif_carrier_off(priv->netdev);
+
+ /* Shutdown the P_Key thread if still active */
+ if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags)) {
+ mutex_lock(&pkey_mutex);
+ set_bit(XVE_PKEY_STOP, &priv->flags);
+ mutex_unlock(&pkey_mutex);
+ }
+
+ xve_mcast_stop_thread(dev, flush);
+ xve_mcast_dev_flush(dev);
+
+ xve_flush_paths(dev);
+
+ return 0;
+}
+
+static int recvs_pending(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int pending = 0;
+ int i;
+
+ for (i = 0; i < xve_recvq_size; ++i)
+ if (priv->rx_ring[i].skb)
+ ++pending;
+
+ return pending;
+}
+
+void xve_drain_cq(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int n, done = 0;
+
+ /*
+ * We call completion handling routines that expect to be
+ * called from the BH-disabled NAPI poll context, so disable
+ * BHs here too.
+ */
+ local_bh_disable();
+
+ do {
+ n = poll_rx(priv, XVE_NUM_WC, &done, 1);
+ } while (n == XVE_NUM_WC);
+
+ poll_tx(priv);
+ local_bh_enable();
+}
+
+int xve_ib_dev_open(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int ret;
+
+ xve_debug(DEBUG_IBDEV_INFO, priv, "%s Open ib_dev\n", __func__);
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
+ xve_warn(priv, "%s P_Key 0x%04x not found\n", __func__,
+ priv->pkey);
+ clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+ return -1;
+ }
+ set_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+
+ ret = xve_init_qp(dev);
+ if (ret != 0) {
+ xve_warn(priv, "%s xve_init_qp returned %d\n", __func__, ret);
+ return -1;
+ }
+
+ ret = xve_ib_post_receives(dev);
+ if (ret != 0) {
+ xve_warn(priv, "%s xve_ib_post_receives returned %d\n",
+ __func__, ret);
+ xve_ib_dev_stop(dev, 1);
+ return -1;
+ }
+
+ ret = xve_cm_dev_open(dev);
+ if (ret != 0) {
+ xve_warn(priv, "%s xve_cm_dev_open returned %d\n", __func__,
+ ret);
+ xve_ib_dev_stop(dev, 1);
+ return -1;
+ }
+
+ clear_bit(XVE_STOP_REAPER, &priv->flags);
+ xve_queue_dwork(priv, XVE_WQ_START_AHREAP,
+ 3 * round_jiffies_relative(HZ));
+
+ if (!test_and_set_bit(XVE_FLAG_INITIALIZED, &priv->flags))
+ napi_enable(&priv->napi);
+
+ /* Set IB Dev to open */
+ set_bit(XVE_IB_DEV_OPEN, &priv->flags);
+
+ return 0;
+}
+
+int xve_ib_dev_stop(struct net_device *dev, int flush)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_attr qp_attr;
+ unsigned long begin;
+ struct xve_tx_buf *tx_req;
+ int i;
+
+ xve_debug(DEBUG_IBDEV_INFO, priv, "%s Stop ib_dev\n", __func__);
+ /* IB Dev stop */
+ if (!test_and_clear_bit(XVE_IB_DEV_OPEN, &priv->flags)) {
+ xve_debug(DEBUG_IBDEV_INFO, priv,
+ "%s Stop IB without being up\n", __func__);
+ return 0;
+ }
+
+ if (test_and_clear_bit(XVE_FLAG_INITIALIZED, &priv->flags))
+ napi_disable(&priv->napi);
+
+ xve_cm_dev_stop(dev);
+
+ /*
+ * Move our QP to the error state and then reinitialize in
+ * when all work requests have completed or have been flushed.
+ */
+ qp_attr.qp_state = IB_QPS_ERR;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ xve_warn(priv, "Failed to modify QP to ERROR state\n");
+
+ /* Wait for all sends and receives to complete */
+ begin = jiffies;
+
+ while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ xve_warn(priv,
+ "%s timing out; %d sends %d receives not completed\n",
+ __func__, priv->tx_head - priv->tx_tail,
+ recvs_pending(dev));
+
+ /*
+ * assume the HW is wedged and just free up
+ * all our pending work requests.
+ */
+ while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
+ tx_req = &priv->tx_ring[priv->tx_tail &
+ (xve_sendq_size - 1)];
+ xve_free_txbuf_memory(priv, tx_req);
+ ++priv->tx_tail;
+ --priv->tx_outstanding;
+ }
+
+ for (i = 0; i < xve_recvq_size; ++i) {
+ struct xve_rx_buf *rx_req;
+
+ rx_req = &priv->rx_ring[i];
+ if (!rx_req->skb)
+ continue;
+ xve_ud_dma_unmap_rx(priv,
+ priv->rx_ring[i].mapping);
+ xve_dev_kfree_skb_any(priv, rx_req->skb, 0);
+ rx_req->skb = NULL;
+ }
+
+ goto timeout;
+ }
+ xve_drain_cq(dev);
+ msleep(20);
+ }
+
+ xve_debug(DEBUG_IBDEV_INFO, priv, "%s All sends and receives done\n",
+ __func__);
+timeout:
+ qp_attr.qp_state = IB_QPS_RESET;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ xve_warn(priv, "Failed to modify QP to RESET state\n");
+
+ /* Wait for all AHs to be reaped */
+ set_bit(XVE_STOP_REAPER, &priv->flags);
+ xve_ah_dev_cleanup(dev);
+
+ ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
+
+ return 0;
+}
+
+int xve_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ priv->ca = ca;
+ priv->port = port;
+ priv->qp = NULL;
+
+ if (xve_transport_dev_init(dev, ca) != 0) {
+ pr_warn("%s: xve_transport_dev_init failed for %s\n",
+ ca->name, priv->xve_name);
+ return -ENODEV;
+ }
+
+ if (dev->flags & IFF_UP) {
+ if (xve_ib_dev_open(dev) != 0) {
+ xve_transport_dev_cleanup(dev);
+ return -ENODEV;
+ }
+ }
+
+ return 0;
+}
+
+static void __xve_ib_dev_flush(struct xve_dev_priv *priv,
+ enum xve_flush_level level)
+{
+ struct net_device *dev = priv->netdev;
+ u16 new_index;
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (!test_bit(XVE_FLAG_INITIALIZED, &priv->flags) ||
+ !test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
+ xve_debug(DEBUG_IBDEV_INFO, priv,
+ "%s Not flushing XVE_FLAG_ADMIN_UP/", __func__);
+ xve_debug(DEBUG_IBDEV_INFO, priv,
+ "XVE_FLAG_INITIALIZED not set flags %lx\n",
+ priv->flags);
+ goto out;
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ if (level == XVE_FLUSH_HEAVY) {
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey,
+ &new_index)) {
+ clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+ xve_ib_dev_down(dev, 0);
+ xve_ib_dev_stop(dev, 0);
+ if (xve_pkey_dev_delay_open(dev))
+ return;
+ }
+
+ /* restart QP only if P_Key index is changed */
+ if (test_and_set_bit(XVE_PKEY_ASSIGNED, &priv->flags) &&
+ new_index == priv->pkey_index) {
+ xve_debug(DEBUG_IBDEV_INFO, priv,
+ "%s PKey index not changed\n", __func__);
+ return;
+ }
+ priv->pkey_index = new_index;
+ }
+
+ if (level == XVE_FLUSH_LIGHT) {
+ xve_mark_paths_invalid(dev);
+ xve_mcast_dev_flush(dev);
+ clear_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags);
+ }
+
+ if (level >= XVE_FLUSH_NORMAL)
+ xve_ib_dev_down(dev, 0);
+
+ if (level == XVE_FLUSH_HEAVY) {
+ xve_ib_dev_stop(dev, 0);
+ xve_ib_dev_open(dev);
+ }
+ spin_lock_irqsave(&priv->lock, flags);
+ set_bit(XVE_FLAG_IB_EVENT, &priv->flags);
+out:
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+void xve_ib_dev_flush_light(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_FLUSHLIGHT, 0);
+
+ __xve_ib_dev_flush(priv, XVE_FLUSH_LIGHT);
+ xve_put_ctx(priv);
+}
+
+void xve_ib_dev_flush_normal(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_FLUSHNORMAL, 0);
+
+ __xve_ib_dev_flush(priv, XVE_FLUSH_NORMAL);
+ xve_put_ctx(priv);
+}
+
+void xve_ib_dev_flush_heavy(struct work_struct *work)
+{
+
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_FLUSHHEAVY, 0);
+
+ __xve_ib_dev_flush(priv, XVE_FLUSH_HEAVY);
+ xve_put_ctx(priv);
+}
+
+void xve_ib_dev_cleanup(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ xve_debug(DEBUG_IBDEV_INFO, priv, "%s Cleaning up ib_dev\n", __func__);
+
+ xve_mcast_stop_thread(dev, 1);
+ xve_mcast_dev_flush(dev);
+ xve_ah_dev_cleanup(dev);
+ xve_transport_dev_cleanup(dev);
+
+}
+
+/*
+ * Delayed P_Key Assigment Interim Support
+ *
+ * The following is initial implementation of delayed P_Key assignment
+ * mechanism. It is using the same approach implemented for the multicast
+ * group join. The single goal of this implementation is to quickly address
+ * Bug #2507. This implementation will probably be removed when the P_Key
+ * change async notification is available.
+ */
+
+void xve_pkey_poll(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_PKEYPOLL, 1);
+ struct net_device *dev = priv->netdev;
+
+ xve_pkey_dev_check_presence(dev);
+
+ if (test_bit(XVE_PKEY_ASSIGNED, &priv->flags))
+ xve_open(dev);
+ else {
+ mutex_lock(&pkey_mutex);
+ if (!test_bit(XVE_PKEY_STOP, &priv->flags))
+ xve_queue_dwork(priv, XVE_WQ_START_PKEYPOLL, HZ);
+ mutex_unlock(&pkey_mutex);
+ }
+ xve_put_ctx(priv);
+}
+
+int xve_pkey_dev_delay_open(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ /* Look for the interface pkey value in the IB Port P_Key table and */
+ /* set the interface pkey assignment flag */
+ xve_pkey_dev_check_presence(dev);
+
+ /* P_Key value not assigned yet - start polling */
+ if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags)) {
+ mutex_lock(&pkey_mutex);
+ clear_bit(XVE_PKEY_STOP, &priv->flags);
+ xve_queue_dwork(priv, XVE_WQ_START_PKEYPOLL, HZ);
+ mutex_unlock(&pkey_mutex);
+ return 1;
+ }
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2011 Xsigo Systems. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+#ifndef XSIGO_LOCAL_VERSION
+#define DRIVER_VERSION "0.31"
+#else
+#define DRIVER_VERSION XSIGO_LOCAL_VERSION
+#endif
+
+static int xve_xsmp_service_id = -1;
+struct mutex xve_mutex;
+static spinlock_t xve_lock;
+u32 xve_counters[XVE_MAX_GLOB_COUNTERS];
+
+MODULE_AUTHOR("Oracle corp (OVN-linux-drivers@oracle.com)");
+MODULE_DESCRIPTION("OVN Virtual Ethernet driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+int xve_sendq_size __read_mostly = XVE_TX_RING_SIZE;
+int xve_recvq_size __read_mostly = XVE_RX_RING_SIZE;
+module_param_named(send_queue_size, xve_sendq_size, int, 0444);
+MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
+module_param_named(recv_queue_size, xve_recvq_size, int, 0444);
+MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
+
+static int napi_weight = 128;
+module_param(napi_weight, int, 0644);
+
+static int xve_no_tx_checksum_offload;
+module_param(xve_no_tx_checksum_offload, int, 0644);
+
+int lro;
+module_param(lro, bool, 0444);
+MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)");
+
+static int lro_max_aggr = XVE_LRO_MAX_AGGR;
+module_param(lro_max_aggr, int, 0644);
+MODULE_PARM_DESC(lro_max_aggr,
+ "LRO: Max packets to be aggregated (default = 64)");
+
+static int xve_hbeat_enable;
+module_param(xve_hbeat_enable, int, 0644);
+MODULE_PARM_DESC(xve_hbeat_enable, "Enable/Disable heartbeat");
+
+static int xve_aging_timeout = 5 * 60;
+module_param(xve_aging_timeout, int, 0644);
+MODULE_PARM_DESC(xve_aging_timeout, "Aging timeout in seconds");
+
+static int xve_flood_rc = 1;
+module_param(xve_flood_rc, int, 0644);
+MODULE_PARM_DESC(xve_flood_rc, "Enable/Disable flood mode for RC");
+
+int xve_debug_level;
+module_param_named(xve_debug_level, xve_debug_level, int, 0644);
+MODULE_PARM_DESC(xve_debug_level, "Enable debug tracing ");
+
+int xve_cm_single_qp;
+module_param_named(xve_cm_single_qp, xve_cm_single_qp, int, 0644);
+
+int xve_mc_sendonly_timeout;
+module_param_named(mc_sendonly_timeout, xve_mc_sendonly_timeout, int, 0644);
+MODULE_PARM_DESC(mc_sendonly_timeout, "Enable debug tracing if > 0");
+
+int xve_do_arp = 1;
+module_param_named(do_arp, xve_do_arp, int, 0644);
+MODULE_PARM_DESC(do_arp, "Enable/Disable ARP for NIC MTU less than IB-MTU");
+
+static void xve_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data,
+ int len);
+
+struct xve_path_iter {
+ struct net_device *dev;
+ struct xve_path path;
+};
+
+static const u8 bcast_mgid[] = {
+ 0xff, 0x12, 0x40, 0x1c, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
+};
+
+static u8 ipv6_dmac_addr[] = {
+ 0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01, 0xff, 0x00, 0x00, 0x00
+};
+
+struct workqueue_struct *xve_workqueue;
+struct workqueue_struct *xve_taskqueue;
+
+struct ib_sa_client xve_sa_client;
+
+struct list_head xve_dev_list;
+
+static inline int xve_esx_preregister_setup(struct net_device *netdev)
+{
+ xg_preregister_pseudo_device(netdev);
+ return 0;
+}
+
+static inline int xve_esx_postregister_setup(struct net_device *netdev)
+{
+ return 0;
+}
+
+static inline void vmk_notify_uplink(struct net_device *netdev)
+{
+
+}
+
+int xve_open(struct net_device *netdev)
+{
+ struct xve_dev_priv *priv = netdev_priv(netdev);
+ unsigned long flags = 0;
+
+ pr_info("XVE: %s Bringing interface up %s\n", __func__, priv->xve_name);
+ priv->counters[XVE_OPEN_COUNTER]++;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ set_bit(XVE_FLAG_ADMIN_UP, &priv->flags);
+ set_bit(XVE_OPER_UP, &priv->state);
+ set_bit(XVE_OS_ADMIN_UP, &priv->state);
+ priv->port_speed = xve_calc_speed(priv);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ if (xve_pkey_dev_delay_open(netdev))
+ return 0;
+
+ if (xve_ib_dev_open(netdev))
+ goto err_disable;
+
+ if (xve_ib_dev_up(netdev))
+ goto err_stop;
+
+ queue_age_work(priv, 0);
+
+ return 0;
+
+err_stop:
+ xve_ib_dev_stop(netdev, 1);
+
+err_disable:
+ clear_bit(XVE_FLAG_ADMIN_UP, &priv->flags);
+
+ return -EINVAL;
+}
+
+static int xve_stop(struct net_device *netdev)
+{
+ struct xve_dev_priv *priv = netdev_priv(netdev);
+ unsigned long flags = 0;
+
+ pr_info("XVE: %s Stopping interface %s\n", __func__, priv->xve_name);
+
+ spin_lock_irqsave(&priv->lock, flags);
+ clear_bit(XVE_FLAG_ADMIN_UP, &priv->flags);
+ clear_bit(XVE_OPER_UP, &priv->state);
+ clear_bit(XVE_OS_ADMIN_UP, &priv->state);
+ handle_carrier_state(priv, 0);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ xve_ib_dev_down(netdev, 0);
+ xve_ib_dev_stop(netdev, 0);
+
+ pr_info("XVE: %s Finished Stopping interface %s\n", __func__,
+ priv->xve_name);
+ return 0;
+}
+
+int xve_modify_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct xve_dev_priv *priv = netdev_priv(netdev);
+
+ pr_info("XVE: %s changing mtu from %d to %d\n",
+ priv->xve_name, priv->admin_mtu, new_mtu);
+ if (new_mtu == netdev->mtu)
+ return 0;
+
+ /* dev->mtu > 2K ==> connected mode */
+ if (xve_cm_admin_enabled(netdev)) {
+ if (new_mtu > xve_cm_max_mtu(netdev))
+ return -EINVAL;
+
+ netdev->mtu = new_mtu;
+ return 0;
+ }
+
+ if (new_mtu > XVE_UD_MTU(priv->max_ib_mtu))
+ return -EINVAL;
+
+ priv->admin_mtu = new_mtu;
+ netdev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
+ xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
+ (void)xve_xsmp_handle_oper_req(priv->xsmp_hndl, priv->resource_id);
+
+ return 0;
+}
+
+static int xve_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ return xve_modify_mtu(netdev, new_mtu);
+}
+
+static int xve_set_mac_address(struct net_device *dev, void *p)
+{
+ struct sockaddr *addr = p;
+
+ if (!is_valid_ether_addr(addr->sa_data))
+ return -EINVAL;
+ memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+ return 0;
+}
+
+static struct net_device_stats *xve_get_stats(struct net_device *netdev)
+{
+ struct xve_dev_priv *priv = netdev_priv(netdev);
+
+ priv->counters[XVE_GETSTATS_COUNTER]++;
+ return &priv->netdev->stats;
+}
+
+static int xve_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+ struct mii_ioctl_data *data = if_mii(ifr);
+ int ret = 0;
+ struct xve_dev_priv *priv;
+
+ if (!netdev && !netif_running(netdev))
+ return -EAGAIN;
+
+ priv = netdev_priv(netdev);
+ priv->counters[XVE_IOCTL_COUNTER]++;
+
+ switch (cmd) {
+ case SIOCGMIIPHY:
+ data->phy_id = 5;
+ break;
+ case SIOCGMIIREG:
+ /*
+ * Mainly used by mii monitor
+ */
+ switch (data->reg_num) {
+ case 0:
+ data->val_out = 0x2100;
+ break;
+ case 1:
+ data->val_out = 0xfe00 |
+ (netif_carrier_ok(netdev) << 2);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+ return ret;
+}
+
+struct xve_path *__path_find(struct net_device *netdev, void *gid)
+{
+ struct xve_dev_priv *priv = netdev_priv(netdev);
+ struct rb_node *n = priv->path_tree.rb_node;
+ struct xve_path *path;
+ int ret;
+
+ while (n) {
+ path = rb_entry(n, struct xve_path, rb_node);
+
+ ret = memcmp(gid, path->pathrec.dgid.raw, sizeof(union ib_gid));
+
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ return path;
+ }
+
+ return NULL;
+}
+
+static int __path_add(struct net_device *netdev, struct xve_path *path)
+{
+ struct xve_dev_priv *priv = netdev_priv(netdev);
+ struct rb_node **n = &priv->path_tree.rb_node;
+ struct rb_node *pn = NULL;
+ struct xve_path *tpath;
+ int ret;
+
+ while (*n) {
+ pn = *n;
+ tpath = rb_entry(pn, struct xve_path, rb_node);
+
+ ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0)
+ n = &pn->rb_left;
+ else if (ret > 0)
+ n = &pn->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(&path->rb_node, pn, n);
+ rb_insert_color(&path->rb_node, &priv->path_tree);
+
+ list_add_tail(&path->list, &priv->path_list);
+
+ return 0;
+}
+
+void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path,
+ int do_lock)
+{
+ struct xve_dev_priv *priv = netdev_priv(netdev);
+ struct xve_fwt_entry *fwt_entry, *tn;
+ unsigned long flags = 0;
+
+ if (do_lock)
+ spin_lock_irqsave(&priv->lock, flags);
+
+ list_for_each_entry_safe(fwt_entry, tn, &path->fwt_list, list)
+ xve_fwt_entry_destroy(priv, fwt_entry);
+
+ if (do_lock)
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void path_free(struct net_device *netdev, struct xve_path *path)
+{
+ struct xve_dev_priv *priv = netdev_priv(netdev);
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ while ((skb = __skb_dequeue(&path->queue)))
+ dev_kfree_skb_irq(skb);
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (xve_cmtx_get(path)) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ xve_cm_destroy_tx_deferred(xve_cmtx_get(path));
+ spin_lock_irqsave(&priv->lock, flags);
+ }
+ xve_flush_l2_entries(netdev, path, 0);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ if (path->ah)
+ xve_put_ah(path->ah);
+ kfree(path);
+}
+
+/*
+ * Called with priv->lock held
+ */
+static void xve_flood_all_paths(struct net_device *dev, struct sk_buff *skb)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_path *path;
+ struct sk_buff *nskb;
+
+ list_for_each_entry(path, &priv->path_list, list) {
+ if (xve_cmtx_get(path) && xve_cm_up(path)) {
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (nskb)
+ xve_cm_send(dev, nskb, xve_cmtx_get(path));
+ }
+ }
+}
+
+void xve_mark_paths_invalid(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_path *path, *tp;
+
+ spin_lock_irq(&priv->lock);
+
+ list_for_each_entry_safe(path, tp, &priv->path_list, list) {
+ xve_debug(DEBUG_IBDEV_INFO, priv,
+ "%s mark path LID 0x%04x GID %pI6 invalid\n",
+ __func__, be16_to_cpu(path->pathrec.dlid),
+ path->pathrec.dgid.raw);
+ path->valid = 0;
+ }
+
+ spin_unlock_irq(&priv->lock);
+}
+
+void xve_flush_paths(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_path *path, *tp;
+
+ list_for_each_entry_safe(path, tp, &priv->path_list, list) {
+ xve_flush_single_path(dev, path);
+ }
+
+}
+
+void xve_flush_single_path_by_gid(struct net_device *dev, union ib_gid *gid)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ unsigned long flags = 0;
+ struct xve_path *path;
+
+ netif_tx_lock_bh(dev);
+ spin_lock_irqsave(&priv->lock, flags);
+
+ path = __path_find(dev, gid->raw);
+ if (!path) {
+ char *mgid_token = gid->raw;
+ char tmp_buf[64];
+
+ xve_debug(DEBUG_FLUSH_INFO, priv, "%s Path not found\n",
+ __func__);
+ print_mgid_buf(tmp_buf, mgid_token);
+ xve_debug(DEBUG_FLUSH_INFO, priv, "%s MGID %s\n",
+ __func__, tmp_buf);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+ return;
+ }
+
+ xve_debug(DEBUG_FLUSH_INFO, priv, "%s Flushing the path %p\n",
+ __func__, path);
+ rb_erase(&path->rb_node, &priv->path_tree);
+ if (path->query)
+ ib_sa_cancel_query(path->query_id, path->query);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ netif_tx_unlock_bh(dev);
+
+ wait_for_completion(&path->done);
+ list_del(&path->list);
+ path_free(dev, path);
+}
+
+void xve_flush_single_path(struct net_device *dev, struct xve_path *path)
+{
+ xve_flush_single_path_by_gid(dev, &path->pathrec.dgid);
+}
+
+static void path_rec_completion(int status,
+ struct ib_sa_path_rec *pathrec, void *path_ptr)
+{
+ struct xve_path *path = path_ptr;
+ struct net_device *dev = path->dev;
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_ah *ah = NULL;
+ struct xve_ah *old_ah = NULL;
+ struct sk_buff_head skqueue;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ if (!status) {
+ priv->counters[XVE_PATHREC_RESP_COUNTER]++;
+ xve_test("XVE: %s PathRec LID 0x%04x for GID %pI6\n",
+ __func__, be16_to_cpu(pathrec->dlid),
+ pathrec->dgid.raw);
+ } else {
+ priv->counters[XVE_PATHREC_RESP_ERR_COUNTER]++;
+ xve_test("XVE: %s PathRec status %d for GID %pI6\n",
+ __func__, status, path->pathrec.dgid.raw);
+ }
+
+ skb_queue_head_init(&skqueue);
+
+ if (!status) {
+ struct ib_ah_attr av;
+
+ if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) {
+ av.ah_flags = IB_AH_GRH;
+ ah = xve_create_ah(dev, priv->pd, &av);
+ }
+ }
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ if (ah) {
+ path->pathrec = *pathrec;
+ old_ah = path->ah;
+ path->ah = ah;
+
+ xve_test
+ ("XVE: %screated address handle %p for LID 0x%04x, SL %d\n",
+ __func__, ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
+ if (xve_cm_enabled(dev)) {
+ if (!xve_cmtx_get(path))
+ xve_cm_create_tx(dev, path);
+ }
+
+ while ((skb = __skb_dequeue(&path->queue)))
+ __skb_queue_tail(&skqueue, skb);
+ path->valid = 1;
+ }
+
+ path->query = NULL;
+ complete(&path->done);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ if (old_ah)
+ xve_put_ah(old_ah);
+
+ while ((skb = __skb_dequeue(&skqueue))) {
+ skb->dev = dev;
+ if (dev_queue_xmit(skb)) {
+ xve_warn(priv,
+ "dev_queue_xmit failed to requeue pkt for %s\n",
+ priv->xve_name);
+ } else {
+ xve_test("%s Succefully completed path for %s\n",
+ __func__, priv->xve_name);
+ }
+ }
+}
+
+static struct xve_path *path_rec_create(struct net_device *dev, void *gid)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_path *path;
+
+ if (!priv->broadcast)
+ return NULL;
+
+ path = kzalloc(sizeof(*path), GFP_ATOMIC);
+ if (!path)
+ return NULL;
+
+ path->dev = dev;
+
+ skb_queue_head_init(&path->queue);
+
+ INIT_LIST_HEAD(&path->fwt_list);
+
+ memcpy(path->pathrec.dgid.raw, gid, sizeof(union ib_gid));
+ path->pathrec.sgid = priv->local_gid;
+ path->pathrec.pkey = cpu_to_be16(priv->pkey);
+ path->pathrec.numb_path = 1;
+ path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
+
+ return path;
+}
+
+static int path_rec_start(struct net_device *dev, struct xve_path *path)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ ib_sa_comp_mask comp_mask =
+ IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU;
+ struct ib_sa_path_rec p_rec;
+
+ p_rec = path->pathrec;
+ p_rec.mtu_selector = IB_SA_GT;
+
+ switch (roundup_pow_of_two(dev->mtu + VLAN_ETH_HLEN)) {
+ case 512:
+ p_rec.mtu = IB_MTU_256;
+ break;
+ case 1024:
+ p_rec.mtu = IB_MTU_512;
+ break;
+ case 2048:
+ p_rec.mtu = IB_MTU_1024;
+ break;
+ default:
+ /* Wildcard everything */
+ comp_mask = 0;
+ p_rec.mtu = 0;
+ p_rec.mtu_selector = 0;
+ }
+
+ xve_dbg_mcast(priv, "%s Start path record lookup for %pI6 MTU > %d\n",
+ __func__, p_rec.dgid.raw,
+ comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0);
+
+ init_completion(&path->done);
+
+ path->query_id =
+ ib_sa_path_rec_get(&xve_sa_client, priv->ca, priv->port,
+ &p_rec, comp_mask |
+ IB_SA_PATH_REC_DGID |
+ IB_SA_PATH_REC_SGID |
+ IB_SA_PATH_REC_NUMB_PATH |
+ IB_SA_PATH_REC_TRAFFIC_CLASS |
+ IB_SA_PATH_REC_PKEY,
+ 1000, GFP_ATOMIC,
+ path_rec_completion, path, &path->query);
+ if (path->query_id < 0) {
+ xve_warn(priv, "ib_sa_path_rec_get failed: %d for %s\n",
+ path->query_id, priv->xve_name);
+ path->query = NULL;
+ complete(&path->done);
+ return path->query_id;
+ }
+ priv->counters[XVE_PATHREC_QUERY_COUNTER]++;
+ return 0;
+}
+
+static void xve_path_lookup(struct sk_buff *skb, struct net_device *dev,
+ struct xve_fwt_entry *fwt_entry, int *ok)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+ struct xve_path *path;
+ unsigned long flags = 0;
+
+ path = __path_find(dev, fwt_entry->dgid.raw);
+ if (!path) {
+ xve_debug(DEBUG_TABLE_INFO, priv, "%s Unable to find path\n",
+ __func__);
+ path = path_rec_create(dev, fwt_entry->dgid.raw);
+ if (!path)
+ goto err_drop;
+ __path_add(dev, path);
+ }
+
+ xve_debug(DEBUG_TABLE_INFO, priv, "%s Adding FWT to list %p\n",
+ __func__, fwt_entry);
+ spin_lock_irqsave(&xve_fwt->lock, flags);
+ fwt_entry->path = path;
+ list_add_tail(&fwt_entry->list, &path->fwt_list);
+ spin_unlock_irqrestore(&xve_fwt->lock, flags);
+ if (!path->ah) {
+ if (!path->query && path_rec_start(dev, path))
+ goto err_drop;
+ }
+ *ok = 1;
+ return;
+err_drop:
+ *ok = 0;
+}
+
+static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct sk_buff *bcast_skb = NULL;
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_fwt_entry *fwt_entry = NULL;
+ struct xve_path *path;
+ unsigned long flags;
+ int ret = NETDEV_TX_OK, len = 0;
+ char *smac;
+ u8 skb_need_tofree = 0, inc_drop_cnt = 0, queued_pkt = 0;
+ u16 vlan_tag = 0;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (!test_bit(XVE_OPER_UP, &priv->state)) {
+ ret = NETDEV_TX_BUSY;
+ inc_drop_cnt = 1;
+ priv->counters[XVE_TX_DROP_OPER_DOWN_COUNT]++;
+ goto unlock;
+ }
+
+ if (skb->len < XVE_MIN_PACKET_LEN) {
+ priv->counters[XVE_SHORT_PKT_COUNTER]++;
+ if (skb_padto(skb, XVE_MIN_PACKET_LEN)) {
+ inc_drop_cnt = 1;
+ priv->counters[XVE_TX_SKB_ALLOC_ERROR_COUNTER]++;
+ ret = NETDEV_TX_BUSY;
+ goto unlock;
+ }
+ skb->len = XVE_MIN_PACKET_LEN;
+ }
+
+ len = skb->len;
+ smac = skb->data + ETH_ALEN;
+
+ if (xg_vlan_tx_tag_present(skb))
+ vlan_get_tag(skb, &vlan_tag);
+
+ fwt_entry = xve_fwt_lookup(&priv->xve_fwt, skb->data, vlan_tag, 0);
+ if (!fwt_entry) {
+ if (is_multicast_ether_addr(skb->data)) {
+ xve_mcast_send(dev, (void *)priv->bcast_mgid.raw, skb);
+ priv->counters[XVE_TX_MCAST_PKT]++;
+ goto stats;
+ } else {
+ /*
+ * XXX Viswa Need to change this
+ * Since this is a unicast packet and we do not have
+ * an L2 table entry
+ * We need to do the following
+ * If packet is less than IB MTU,flood it
+ * If more than IB MTU, we need to send to to all ports
+ * We still generate ARP and NDP for IPv4 and IPv6
+ * respectively
+ */
+
+ /*
+ * Do not ARP if if user does not want to for less
+ * than IB-MTU
+ */
+ if (xve_do_arp
+ || (priv->netdev->mtu >
+ XVE_UD_MTU(priv->max_ib_mtu)))
+
+ bcast_skb = xve_generate_query(priv, skb);
+ if (bcast_skb != NULL)
+ xve_mcast_send(dev,
+ (void *)priv->bcast_mgid.raw,
+ bcast_skb);
+ /*
+ * Now send the original packet also to over broadcast
+ * Later add counters for flood mode
+ */
+ if (len < XVE_UD_MTU(priv->max_ib_mtu)) {
+ xve_mcast_send(dev,
+ (void *)priv->bcast_mgid.raw,
+ skb);
+ priv->counters[XVE_TX_MCAST_FLOOD_UD]++;
+ } else {
+ if (xve_flood_rc) {
+ xve_flood_all_paths(dev, skb);
+ priv->counters[XVE_TX_MCAST_FLOOD_RC]++;
+ /*
+ * Free the original skb
+ */
+ skb_need_tofree = 1;
+ } else {
+ skb_need_tofree = 1;
+ goto unlock;
+ }
+ }
+ goto stats;
+ }
+ }
+
+ if (!fwt_entry->path) {
+ int ok;
+
+ priv->counters[XVE_PATH_NOT_FOUND]++;
+ xve_debug(DEBUG_SEND_INFO, priv,
+ "%s Unable to find neigbour doing a path lookup\n",
+ __func__);
+ xve_path_lookup(skb, dev, fwt_entry, &ok);
+ if (!ok) {
+ skb_need_tofree = inc_drop_cnt = 1;
+ goto free_fwt_ctx;
+ }
+ } else {
+ path = fwt_entry->path;
+ if (!path->ah) {
+ priv->counters[XVE_AH_NOT_FOUND]++;
+ xve_debug(DEBUG_SEND_INFO, priv,
+ "%s Path present %p no ah\n", __func__,
+ fwt_entry->path);
+ if (!path->query && path_rec_start(dev, path)) {
+ skb_need_tofree = inc_drop_cnt = 1;
+ goto free_fwt_ctx;
+ }
+ }
+ }
+
+ path = fwt_entry->path;
+
+ if (xve_cmtx_get(path)) {
+ if (xve_cm_up(path)) {
+ xve_cm_send(dev, skb, xve_cmtx_get(path));
+ update_cm_tx_rate(xve_cmtx_get(path), len);
+ priv->counters[XVE_TX_RC_COUNTER]++;
+ goto stats;
+ }
+ } else if (path->ah) {
+ xve_debug(DEBUG_SEND_INFO, priv, "%s path ah is %p\n",
+ __func__, path->ah);
+ xve_send(dev, skb, path->ah, fwt_entry->dqpn);
+ priv->counters[XVE_TX_UD_COUNTER]++;
+ goto stats;
+ }
+
+ if (skb_queue_len(&path->queue) < XVE_MAX_PATH_REC_QUEUE) {
+ priv->counters[XVE_TX_QUEUE_PKT]++;
+ __skb_queue_tail(&path->queue, skb);
+ queued_pkt = 1;
+ } else {
+ xve_debug(DEBUG_SEND_INFO, priv,
+ "%s Dropping packets path %p fwt_entry %p\n",
+ __func__, path, fwt_entry);
+ skb_need_tofree = inc_drop_cnt = 1;
+ goto free_fwt_ctx;
+ }
+stats:
+ INC_TX_PKT_STATS(priv, dev);
+ INC_TX_BYTE_STATS(priv, dev, len);
+ priv->counters[XVE_TX_COUNTER]++;
+free_fwt_ctx:
+ xve_fwt_put_ctx(&priv->xve_fwt, fwt_entry);
+unlock:
+ if (inc_drop_cnt)
+ INC_TX_DROP_STATS(priv, dev);
+
+ if (!queued_pkt)
+ dev->trans_start = jiffies;
+ if (skb_need_tofree)
+ dev_kfree_skb(skb);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return ret;
+}
+
+static void xve_timeout(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ xve_warn(priv, "transmit timeout: latency %d msecs\n",
+ jiffies_to_msecs(jiffies - dev->trans_start));
+ xve_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
+ netif_queue_stopped(dev), priv->tx_head, priv->tx_tail);
+ priv->counters[XVE_WDOG_TIMEOUT_COUNTER]++;
+}
+
+static void xve_set_mcast_list(struct net_device *dev)
+{
+}
+
+int xve_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ /* Allocate RX/TX "rings" to hold queued skbs */
+ priv->rx_ring = kcalloc(xve_recvq_size, sizeof(*priv->rx_ring),
+ GFP_KERNEL);
+ if (!priv->rx_ring) {
+ pr_warn("%s: failed to allocate RX ring (%d entries)\n",
+ ca->name, xve_recvq_size);
+ goto out;
+ }
+
+ priv->tx_ring = vmalloc(xve_sendq_size * sizeof(*priv->tx_ring));
+ if (!priv->tx_ring) {
+ pr_warn("%s: failed to allocate TX ring (%d entries)\n",
+ ca->name, xve_sendq_size);
+ goto out_rx_ring_cleanup;
+ }
+ memset(priv->tx_ring, 0, xve_sendq_size * sizeof(*priv->tx_ring));
+
+ /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
+
+ if (xve_ib_dev_init(dev, ca, port) != 0) {
+ pr_err("%s Failed for %s\n", __func__, priv->xve_name);
+ goto out_tx_ring_cleanup;
+ }
+
+ return 0;
+
+out_tx_ring_cleanup:
+ vfree(priv->tx_ring);
+
+out_rx_ring_cleanup:
+ kfree(priv->rx_ring);
+
+out:
+ return -ENOMEM;
+}
+
+void xve_dev_cleanup(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ xve_remove_proc_entry(priv);
+ xve_ib_dev_cleanup(dev);
+
+ kfree(priv->rx_ring);
+ vfree(priv->tx_ring);
+
+ priv->rx_ring = NULL;
+ priv->tx_ring = NULL;
+
+ xve_fwt_cleanup(priv);
+}
+
+static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
+ void **tcph, u64 *hdr_flags, void *priv)
+{
+ unsigned int ip_len;
+ struct iphdr *iph;
+
+ if (unlikely(skb->protocol != htons(ETH_P_IP)))
+ return -1;
+
+ /* Check for non-TCP packet */
+ skb_reset_network_header(skb);
+ iph = ip_hdr(skb);
+ if (iph->protocol != IPPROTO_TCP)
+ return -1;
+
+ ip_len = ip_hdrlen(skb);
+ skb_set_transport_header(skb, ip_len);
+ *tcph = tcp_hdr(skb);
+
+ /* check if IP header and TCP header are complete */
+ if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
+ return -1;
+
+ *hdr_flags = LRO_IPV4 | LRO_TCP;
+ *iphdr = iph;
+
+ return 0;
+}
+
+static void xve_lro_setup(struct xve_dev_priv *priv)
+{
+ priv->lro.lro_mgr.max_aggr = lro_max_aggr;
+ priv->lro.lro_mgr.max_desc = XVE_MAX_LRO_DESCRIPTORS;
+ priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc;
+ priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
+ priv->lro.lro_mgr.features = LRO_F_NAPI;
+ priv->lro.lro_mgr.dev = priv->netdev;
+ priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+}
+
+static struct net_device_ops xve_netdev_ops = {
+ .ndo_open = xve_open,
+ .ndo_stop = xve_stop,
+ .ndo_change_mtu = xve_change_mtu,
+ .ndo_set_mac_address = xve_set_mac_address,
+ .ndo_start_xmit = xve_start_xmit,
+ .ndo_tx_timeout = xve_timeout,
+ .ndo_set_rx_mode = xve_set_mcast_list,
+ .ndo_do_ioctl = xve_ioctl,
+ .ndo_get_stats = xve_get_stats,
+};
+
+static void xve_set_oper_down(struct xve_dev_priv *priv)
+{
+ if (test_and_clear_bit(XVE_OPER_UP, &priv->state)) {
+ handle_carrier_state(priv, 0);
+ clear_bit(XVE_OPER_REP_SENT, &priv->state);
+ clear_bit(XVE_PORT_LINK_UP, &priv->state);
+ clear_bit(XVE_OPER_UP, &priv->state);
+ xve_xsmp_send_oper_state(priv, priv->resource_id,
+ XSMP_XVE_OPER_DOWN);
+ }
+}
+
+static void xve_io_disconnect(struct xve_dev_priv *priv)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (test_bit(XVE_OPER_UP, &priv->state)) {
+ xve_set_oper_down(priv);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ if (test_bit(XVE_OS_ADMIN_UP, &priv->state))
+ napi_synchronize(&priv->napi);
+ pr_info("%s Flushing mcast [xve :%s]\n", __func__,
+ priv->xve_name);
+ xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+ } else {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
+}
+
+void handle_carrier_state(struct xve_dev_priv *priv, char state)
+{
+ if (state) {
+ priv->jiffies = jiffies;
+ priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
+ netif_carrier_on(priv->netdev);
+ netif_wake_queue(priv->netdev);
+ /* careful we are holding lock (priv->lock)inside this */
+ xve_data_recv_handler(priv);
+ } else {
+ netif_carrier_off(priv->netdev);
+ netif_stop_queue(priv->netdev);
+ priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
+ }
+}
+
+struct sk_buff *xve_generate_query(struct xve_dev_priv *priv,
+ struct sk_buff *skb)
+{
+ struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+
+ if ((xg_vlan_tx_tag_present(skb)
+ && veth->h_vlan_encapsulated_proto == htons(ETH_P_IP))
+ || skb->protocol == htons(ETH_P_IP))
+ return xve_create_arp(priv, skb);
+ if ((xg_vlan_tx_tag_present(skb)
+ && veth->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))
+ || skb->protocol == htons(ETH_P_IPV6))
+ return xve_create_ndp(priv, skb);
+
+ return NULL;
+}
+
+struct sk_buff *xve_create_arp(struct xve_dev_priv *priv,
+ struct sk_buff *skb_pkt)
+{
+ struct sk_buff *skb;
+ struct arphdr *arp;
+ struct iphdr *iphdr;
+ unsigned char *arp_ptr, *eth_ptr;
+ struct net_device *netdev = priv->netdev;
+
+ skb = alloc_skb(XVE_MIN_PACKET_LEN, GFP_ATOMIC);
+ if (skb == NULL)
+ return NULL;
+
+ eth_ptr = (unsigned char *)skb_put(skb, XVE_MIN_PACKET_LEN);
+ /*
+ * Broadcast packet
+ */
+ memset(eth_ptr, 0xFF, ETH_ALEN);
+ eth_ptr += ETH_ALEN;
+ /*
+ * Copy the source MAC
+ */
+ memcpy(eth_ptr, skb_pkt->data + ETH_ALEN, ETH_ALEN);
+
+ eth_ptr += ETH_ALEN;
+
+ if (xg_vlan_tx_tag_present(skb_pkt)) {
+ u16 vlan_tci = 0;
+ struct vlan_ethhdr *veth;
+
+ vlan_get_tag(skb_pkt, &vlan_tci);
+ veth = (struct vlan_ethhdr *)(skb->data);
+ veth->h_vlan_proto = htons(ETH_P_8021Q);
+ /* now, the TCI */
+ veth->h_vlan_TCI = htons(vlan_tci);
+ eth_ptr += VLAN_HLEN;
+ priv->counters[XVE_TX_MCAST_ARP_VLAN_QUERY]++;
+ }
+
+ *eth_ptr++ = (ETH_P_ARP >> 8) & 0xff;
+ *eth_ptr++ = ETH_P_ARP & 0xff;
+
+ arp = (struct arphdr *)eth_ptr;
+ arp->ar_hrd = htons(netdev->type);
+ arp->ar_pro = htons(ETH_P_IP);
+ arp->ar_hln = netdev->addr_len;
+ arp->ar_pln = 4;
+ arp->ar_op = htons(ARPOP_REQUEST);
+
+ iphdr = (struct iphdr *)ip_hdr(skb_pkt);
+ arp_ptr = (unsigned char *)(arp + 1);
+
+ ether_addr_copy(arp_ptr, skb_pkt->data + ETH_ALEN);
+ arp_ptr += netdev->addr_len;
+ memcpy(arp_ptr, &iphdr->saddr, 4);
+ arp_ptr += 4;
+ ether_addr_copy(arp_ptr, skb_pkt->data);
+ arp_ptr += netdev->addr_len;
+ memcpy(arp_ptr, &iphdr->daddr, 4);
+
+ skb_reset_network_header(skb);
+ skb->dev = netdev;
+ skb->protocol = htons(ETH_P_ARP);
+ priv->counters[XVE_TX_MCAST_ARP_QUERY]++;
+ return skb;
+}
+
+/**
+ * Function: xve_create_ndp()
+ * Param: priv - private structure
+ * skb_pkt - skb buff from stack
+ * Description: generates NDP packet (ARP) packet for ipv6
+ * This funciton generates Neighbor Solicitation
+ * packet to discover the link layer address of
+ * an on-link ipv6 node or to confirm the previously
+ * determined link layer address.
+ *
+ * The NDP packet constructed follows the packet format as:
+ * Ethernet Header
+ *-----------------------------
+ * - destination mac 6 bytes
+ * - source mac 6 bytes
+ * - type ipv6 (0x86dd) 2 bytes
+ * IPV6 Header
+ *-----------------------------
+ * - Version 4 bits
+ * - traffic class 4 bits
+ * - flow label 3 bytes
+ * - payload length 3 bytes
+ * - next header 1 byte
+ * - hop limit 1 byte
+ * - source ip addr 16 bytes
+ * - destination ip addr 16 bytes
+ * ICMPv6 Header
+ *----------------------------
+ * - type 1 byte
+ * - code 1 byte
+ * - checksum 2 bytes
+ * - reserved 4 bytes
+ * - target ip addr 16 bytes
+ * ICMPv6 Optional Header
+ *----------------------------
+ * - type 1 byte
+ * - length 1 byte
+ * - source mac addr 6 bytes
+ *-------------------------------------------------------------
+ * TOTAL 86 bytes
+ **/
+
+struct sk_buff *xve_create_ndp(struct xve_dev_priv *priv,
+ struct sk_buff *skb_pkt)
+{
+ struct sk_buff *skb;
+ struct net_device *netdev = priv->netdev;
+ struct ipv6hdr *ipv6_hdr, *ipv6_hdr_tmp;
+ struct icmp6_ndp *icmp_ndp_hdr;
+ unsigned char *hdr_ptr;
+ unsigned char source_addr[16];
+ unsigned char dest_addr[16];
+ int count; /* keep tack of skb_pkt->data */
+
+ count = 0;
+ skb = alloc_skb(XVE_IPV6_MIN_PACK_LEN, GFP_ATOMIC);
+ if (skb == NULL)
+ return NULL;
+
+ /* get the ipv6hdr from skb_pkt */
+ if (xg_vlan_tx_tag_present(skb_pkt))
+ ipv6_hdr_tmp =
+ (struct ipv6hdr *)(skb_pkt->data + ETH_HDR_LEN + VLAN_HLEN);
+ else
+ ipv6_hdr_tmp = (struct ipv6hdr *)(skb_pkt->data + ETH_HDR_LEN);
+
+ /* get local copy of source and destination ip address */
+ memcpy(source_addr, ipv6_hdr_tmp->saddr.s6_addr, IPV6_ADDR_LEN);
+ memcpy(dest_addr, ipv6_hdr_tmp->daddr.s6_addr, IPV6_ADDR_LEN);
+
+ /* initialise the memory allocated */
+ memset(skb->data, 0, XVE_IPV6_MIN_PACK_LEN);
+ /* create space for data in skb buffer */
+ hdr_ptr = (unsigned char *)skb_put(skb, XVE_IPV6_MIN_PACK_LEN);
+
+ /* construct destination mac address (multicast address) */
+ hdr_ptr[0] = PREFIX_MULTI_ADDR;
+ hdr_ptr[1] = PREFIX_MULTI_ADDR;
+ /* get the last 4 bytes from ipv6 destination ip address */
+ memcpy(hdr_ptr + 2, &(dest_addr[IPV6_ADDR_LEN - 4]), ETH_ALEN - 2);
+
+ hdr_ptr += ETH_ALEN;
+ count += ETH_ALEN;
+
+ /* copy the source MAC */
+ memcpy(hdr_ptr, skb_pkt->data + ETH_ALEN, ETH_ALEN);
+ hdr_ptr += ETH_ALEN;
+ count += ETH_ALEN;
+
+ if (xg_vlan_tx_tag_present(skb_pkt)) {
+ u16 vlan_tci = 0;
+ struct vlan_ethhdr *veth;
+
+ vlan_get_tag(skb_pkt, &vlan_tci);
+ veth = (struct vlan_ethhdr *)(skb->data);
+ veth->h_vlan_proto = htons(ETH_P_8021Q);
+ /* now, the TCI */
+ veth->h_vlan_TCI = htons(vlan_tci);
+ hdr_ptr += VLAN_HLEN;
+ priv->counters[XVE_TX_MCAST_NDP_VLAN_QUERY]++;
+ }
+
+ *hdr_ptr++ = (ETH_P_IPV6 >> 8) & 0xff;
+ count++;
+ *hdr_ptr++ = ETH_P_IPV6 & 0xff;
+ count++;
+
+ /* get the header pointer to populate with ipv6 header */
+ ipv6_hdr = (struct ipv6hdr *)hdr_ptr;
+
+ /* construct ipv6 header */
+ ipv6_hdr->priority = ipv6_hdr_tmp->priority;
+ ipv6_hdr->version = ipv6_hdr_tmp->version;
+ memcpy(ipv6_hdr->flow_lbl, ipv6_hdr_tmp->flow_lbl, 3);
+ ipv6_hdr->payload_len = PAYLOAD_LEN;
+ ipv6_hdr->nexthdr = NEXTHDR_ICMP;
+ ipv6_hdr->hop_limit = ipv6_hdr_tmp->hop_limit;
+ /* get the ipv6 source ip address */
+ memcpy(ipv6_hdr->saddr.s6_addr, source_addr, IPV6_ADDR_LEN);
+ /* construct the multicast dest. ip addr. Solicited Node address */
+ memcpy(&(ipv6_dmac_addr[13]), &(dest_addr[13]), 3);
+ /* get the ipv6 destination ip address */
+ memcpy(ipv6_hdr->daddr.s6_addr, ipv6_dmac_addr, IPV6_ADDR_LEN);
+
+ /* update the header pointer */
+ hdr_ptr += IPV6_HDR_LEN;
+ /* get the header pointer to populate with icmp header */
+ icmp_ndp_hdr = (struct icmp6_ndp *)hdr_ptr;
+
+ /* initialize with ICMP-NDP type */
+ icmp_ndp_hdr->icmp6_type = ICMP_NDP_TYPE;
+
+ /* initialize with ICMP-NDP code */
+ icmp_ndp_hdr->icmp6_code = ICMP_CODE;
+
+ /* get the destination addr from ipv6 header for
+ * ICMP-NDP destination addr */
+ memcpy(&(icmp_ndp_hdr->icmp6_daddr), dest_addr, IPV6_ADDR_LEN);
+
+ /* update icmp header with the optional header */
+ icmp_ndp_hdr->icmp6_option_type = ICMP_OPTION_TYPE;
+ icmp_ndp_hdr->icmp6_option_len = ICMP_OPTION_LEN;
+ /* get the source mac address */
+ memcpy(&(icmp_ndp_hdr->icmp6_option_saddr), skb_pkt->data + ETH_ALEN,
+ ETH_ALEN);
+
+ /* calculate the checksum and update the ICMP-NDP header */
+ icmp_ndp_hdr->icmp6_cksum =
+ csum_ipv6_magic((struct in6_addr *)ipv6_hdr->saddr.s6_addr,
+ (struct in6_addr *)ipv6_hdr->daddr.s6_addr,
+ PAYLOAD_LEN, IPPROTO_ICMPV6,
+ csum_partial(icmp_ndp_hdr, PAYLOAD_LEN, 0));
+
+ skb_reset_network_header(skb);
+ skb->dev = netdev;
+ skb->protocol = htons(ETH_P_IPV6);
+ priv->counters[XVE_TX_MCAST_NDP_QUERY]++;
+ return skb;
+}
+
+int xve_send_hbeat(struct xve_dev_priv *priv)
+{
+ struct sk_buff *skb;
+ struct arphdr *arp;
+ unsigned char *arp_ptr, *eth_ptr;
+ int ret;
+
+ if (!xve_hbeat_enable)
+ return 0;
+ skb = alloc_skb(XVE_MIN_PACKET_LEN, GFP_ATOMIC);
+ if (skb == NULL) {
+ priv->counters[XVE_HBEAT_ERR_COUNTER]++;
+ return -ENOMEM;
+ }
+ priv->counters[XVE_DATA_HBEAT_COUNTER]++;
+
+ eth_ptr = (unsigned char *)skb_put(skb, XVE_MIN_PACKET_LEN);
+ ether_addr_copy(eth_ptr, priv->netdev->dev_addr);
+ eth_ptr += ETH_ALEN;
+ ether_addr_copy(eth_ptr, priv->netdev->dev_addr);
+ eth_ptr += ETH_ALEN;
+ *eth_ptr++ = (ETH_P_RARP >> 8) & 0xff;
+ *eth_ptr++ = ETH_P_RARP & 0xff;
+
+ arp = (struct arphdr *)eth_ptr;
+ arp->ar_hrd = htons(priv->netdev->type);
+ arp->ar_hln = priv->netdev->addr_len;
+ arp->ar_pln = 4;
+ arp->ar_op = htons(ARPOP_RREPLY);
+
+ arp_ptr = (unsigned char *)(arp + 1);
+
+ ether_addr_copy(arp_ptr, priv->netdev->dev_addr);
+ arp_ptr += priv->netdev->addr_len;
+ arp_ptr += 4;
+ ether_addr_copy(arp_ptr, priv->netdev->dev_addr);
+
+ skb_reset_network_header(skb);
+ skb->dev = priv->netdev;
+ skb->protocol = htons(ETH_P_RARP);
+
+ ret = xve_start_xmit(skb, priv->netdev);
+ if (ret)
+ dev_kfree_skb_any(skb);
+
+ return 0;
+}
+
+static int xve_xsmp_send_msg(xsmp_cookie_t xsmp_hndl, void *data, int length)
+{
+ struct xsmp_message_header *m_header = data;
+ int ret;
+
+ m_header->length = cpu_to_be16(m_header->length);
+ ret = xcpm_send_message(xsmp_hndl, xve_xsmp_service_id, data, length);
+ if (ret)
+ xcpm_free_msg(data);
+ return ret;
+}
+
+static int xve_xsmp_send_notification(struct xve_dev_priv *priv, u64 vid,
+ int notifycmd)
+{
+ xsmp_cookie_t *xsmp_hndl = priv->xsmp_hndl;
+ int length = sizeof(struct xsmp_message_header) +
+ sizeof(struct xve_xsmp_msg);
+ void *msg;
+ struct xsmp_message_header *header;
+ struct xve_xsmp_msg *xsmp_msg;
+
+ msg = xcpm_alloc_msg(length);
+ if (!msg)
+ return -ENOMEM;
+
+ memset(msg, 0, length);
+
+ header = (struct xsmp_message_header *)msg;
+ xsmp_msg = (struct xve_xsmp_msg *)(msg + sizeof(*header));
+
+ if (notifycmd == XSMP_XVE_OPER_UP) {
+ pr_info("XVE: %s sending updated mtu for %s[mtu %d]\n",
+ __func__, priv->xve_name, priv->admin_mtu);
+ xsmp_msg->vn_mtu = cpu_to_be16(priv->admin_mtu);
+ xsmp_msg->net_id = cpu_to_be32(priv->net_id);
+ }
+
+ header->type = XSMP_MESSAGE_TYPE_XVE;
+ header->length = length;
+
+ xsmp_msg->type = notifycmd;
+ xsmp_msg->length = cpu_to_be16(sizeof(*xsmp_msg));
+ xsmp_msg->resource_id = cpu_to_be64(vid);
+
+ return xve_xsmp_send_msg(xsmp_hndl, msg, length);
+}
+
+static void handle_action_flags(struct xve_dev_priv *priv)
+{
+ if (test_bit(XVE_TRIGGER_NAPI_SCHED, &priv->state)) {
+ xve_data_recv_handler(priv);
+ clear_bit(XVE_TRIGGER_NAPI_SCHED, &priv->state);
+ }
+}
+
+static int xve_state_machine(struct xve_dev_priv *priv)
+{
+
+ priv->counters[XVE_STATE_MACHINE]++;
+
+ if (!test_bit(XVE_OS_ADMIN_UP, &priv->state) ||
+ !test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) ||
+ test_bit(XVE_IBLINK_DOWN, &priv->state) ||
+ test_bit(XVE_DELETING, &priv->state)) {
+ priv->counters[XVE_STATE_MACHINE_DOWN]++;
+ xve_io_disconnect(priv);
+ if (test_bit(XVE_SEND_ADMIN_STATE, &priv->state)) {
+ clear_bit(XVE_SEND_ADMIN_STATE, &priv->state);
+ xve_xsmp_send_notification(priv,
+ priv->resource_id,
+ XSMP_XVE_UPDATE);
+ }
+ priv->sm_delay = 2000;
+ goto out;
+ }
+
+ if (test_bit(XVE_OPER_UP, &priv->state) &&
+ test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
+ !test_bit(XVE_DELETING, &priv->state)) {
+
+ priv->counters[XVE_STATE_MACHINE_UP]++;
+ if (!test_bit(XVE_OPER_REP_SENT, &priv->state))
+ (void)xve_xsmp_handle_oper_req(priv->xsmp_hndl,
+ priv->resource_id);
+
+ /*Bring ib up (start mcast ) */
+ if (!test_bit(XVE_FLAG_OPER_UP, &priv->flags))
+ xve_ib_dev_up(priv->netdev);
+
+ /* Clear Out standing IB Event */
+ if (test_and_clear_bit(XVE_FLAG_IB_EVENT, &priv->flags)) {
+ xve_debug(DEBUG_MCAST_INFO, priv,
+ "%s Clear Pending IB work [xve %s]\n",
+ __func__, priv->xve_name);
+ xve_queue_work(priv, XVE_WQ_START_MCASTRESTART);
+ }
+
+ handle_action_flags(priv);
+
+ if (priv->send_hbeat_flag) {
+ poll_tx(priv);
+ xve_send_hbeat(priv);
+ }
+ priv->send_hbeat_flag = 1;
+ }
+
+out:
+ return 0;
+}
+
+void queue_age_work(struct xve_dev_priv *priv, int msecs)
+{
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (!test_bit(XVE_DELETING, &priv->state) &&
+ test_bit(XVE_OS_ADMIN_UP, &priv->state))
+ xve_queue_dwork(priv, XVE_WQ_START_FWT_AGING,
+ msecs_to_jiffies(msecs));
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+void queue_sm_work(struct xve_dev_priv *priv, int msecs)
+{
+ int del = 0;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (!test_bit(XVE_DELETING, &priv->state))
+ queue_delayed_work(xve_workqueue, &priv->sm_work,
+ msecs_to_jiffies(msecs));
+ else
+ del = 1;
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ if (del)
+ xve_remove_one(priv);
+}
+
+void xve_start_aging_work(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_FWT_AGING, 1);
+
+ mutex_lock(&priv->mutex);
+ xve_aging_task_machine(priv);
+ mutex_unlock(&priv->mutex);
+
+ if (priv->aging_delay != 0)
+ queue_age_work(priv, 30 * HZ);
+ xve_put_ctx(priv);
+}
+
+void xve_state_machine_work(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ container_of(work, struct xve_dev_priv, sm_work.work);
+
+ mutex_lock(&priv->mutex);
+ xve_state_machine(priv);
+ mutex_unlock(&priv->mutex);
+
+ queue_sm_work(priv, priv->sm_delay);
+}
+
+static void xve_setup(struct net_device *netdev)
+{
+ struct xve_dev_priv *priv = netdev_priv(netdev);
+
+ ether_setup(netdev);
+ priv->netdev = netdev;
+}
+
+static void xve_set_netdev(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ spin_lock_init(&priv->lock);
+ mutex_init(&priv->mutex);
+
+ INIT_LIST_HEAD(&priv->path_list);
+ INIT_LIST_HEAD(&priv->dead_ahs);
+ INIT_LIST_HEAD(&priv->multicast_list);
+ INIT_DELAYED_WORK(&priv->sm_work, xve_state_machine_work);
+ INIT_DELAYED_WORK(&priv->mcast_leave_task, xve_mcast_leave_task);
+ INIT_DELAYED_WORK(&priv->mcast_join_task, xve_mcast_join_task);
+ INIT_DELAYED_WORK(&priv->stale_task, xve_cm_stale_task);
+
+}
+
+int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca)
+{
+ struct ib_device_attr *device_attr;
+ int result = -ENOMEM;
+
+ priv->netdev->watchdog_timeo = 1000 * HZ;
+ priv->netdev->tx_queue_len = xve_sendq_size * 2;
+ priv->netdev->features |=
+ NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_IP_CSUM;
+ set_bit(XVE_FLAG_CSUM, &priv->flags);
+
+ if (lro)
+ priv->lro_mode = 1;
+ /* 1 -RC , 2 -UD */
+ if (priv->vnet_mode == 1) {
+ pr_info("XVE: %s Setting RC mode for %s\n", __func__,
+ priv->xve_name);
+ strcpy(priv->mode, "connected(RC)");
+ /* Turn off checksum offload If the module parameter is set */
+ /* TBD if the chassis sends a CHECK SUM BIT */
+ if (xve_no_tx_checksum_offload) {
+ priv->netdev->features &= ~NETIF_F_IP_CSUM;
+ clear_bit(XVE_FLAG_CSUM, &priv->flags);
+ }
+
+ set_bit(XVE_FLAG_ADMIN_CM, &priv->flags);
+ priv->netdev->features &= ~(NETIF_F_TSO | NETIF_F_SG);
+ priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+ priv->cm_supported = 1;
+ } else { /* UD */
+ /* MTU will be reset when mcast join happens */
+ strcpy(priv->mode, "datagram(UD)");
+ if (priv->netdev->mtu > XVE_UD_MTU(priv->max_ib_mtu))
+ priv->netdev->mtu = XVE_UD_MTU(priv->max_ib_mtu);
+ priv->lro_mode = 1;
+ priv->cm_supported = 0;
+
+ }
+ priv->mcast_mtu = priv->admin_mtu = priv->netdev->mtu;
+
+ if (priv->lro_mode)
+ priv->netdev->features |= NETIF_F_LRO;
+
+ xg_setup_pseudo_device(priv->netdev, hca);
+
+ SET_NETDEV_OPS(priv->netdev, &xve_netdev_ops);
+ xve_set_ethtool_ops(priv->netdev);
+ netif_napi_add(priv->netdev, &priv->napi, xve_poll, napi_weight);
+ if (xve_esx_preregister_setup(priv->netdev))
+ return -EINVAL;
+ xve_lro_setup(priv);
+
+ xve_set_netdev(priv->netdev);
+
+ device_attr = kmalloc(sizeof(*device_attr), GFP_KERNEL);
+
+ if (!device_attr) {
+ pr_warn("%s: allocation of %zu bytes failed\n",
+ hca->name, sizeof(*device_attr));
+ return result;
+ }
+
+ result = ib_query_device(hca, device_attr);
+ if (result) {
+ pr_warn("%s: ib_query_device failed (ret = %d)\n",
+ hca->name, result);
+ kfree(device_attr);
+ return result;
+ }
+ priv->hca_caps = device_attr->device_cap_flags;
+
+ kfree(device_attr);
+
+ return 0;
+}
+
+static int xve_xsmp_send_nack(xsmp_cookie_t xsmp_hndl, void *data, int length,
+ u8 code)
+{
+ void *msg;
+ struct xsmp_message_header *m_header;
+ int total_len = length + sizeof(struct xsmp_message_header);
+ struct xve_xsmp_msg *xsmsgp = (struct xve_xsmp_msg *)data;
+
+ msg = xcpm_alloc_msg(total_len);
+ if (!msg)
+ return -ENOMEM;
+ m_header = (struct xsmp_message_header *)msg;
+ m_header->type = XSMP_MESSAGE_TYPE_XVE;
+ m_header->length = total_len;
+
+ xsmsgp->code = XSMP_XVE_NACK | code;
+ memcpy(msg + sizeof(*m_header), data, length);
+ return xve_xsmp_send_msg(xsmp_hndl, msg, total_len);
+}
+
+void xve_remove_one(struct xve_dev_priv *priv)
+{
+
+ int count = 0;
+
+ pr_info("XVE:%s Removing xve interface %s\n", __func__, priv->xve_name);
+ ib_unregister_event_handler(&priv->event_handler);
+ cancel_delayed_work_sync(&priv->stale_task);
+ rtnl_lock();
+ dev_change_flags(priv->netdev, priv->netdev->flags & ~IFF_UP);
+ rtnl_unlock();
+ vmk_notify_uplink(priv->netdev);
+ unregister_netdev(priv->netdev);
+ pr_info("XVE:%s Unregistered xve interface %s\n", __func__,
+ priv->xve_name);
+ /* Wait for reference count to go zero */
+ while (atomic_read(&priv->ref_cnt) && xve_continue_unload()) {
+ count++;
+ if (count > 20) {
+ pr_info("%s: Waiting for refcnt to become", __func__);
+ pr_info("zero [xve: %s] %d\n",
+ priv->xve_name, atomic_read(&priv->ref_cnt));
+ count = 0;
+ }
+ msleep(1000);
+ }
+ xve_dev_cleanup(priv->netdev);
+ if (!test_bit(XVE_SHUTDOWN, &priv->state)) {
+ /* Ideally need to figure out why userspace ACK isn't working */
+ xve_xsmp_send_notification(priv,
+ priv->resource_id, XSMP_XVE_DELETE);
+ }
+ mutex_lock(&xve_mutex);
+ list_del(&priv->list);
+ mutex_unlock(&xve_mutex);
+ free_netdev(priv->netdev);
+
+ pr_info("XVE:%s Removed xve interface %s\n", __func__, priv->xve_name);
+
+}
+
+static int xcpm_check_vnic_from_same_pvi(xsmp_cookie_t xsmp_hndl,
+ struct xve_xsmp_msg *xmsgp)
+{
+ struct xve_dev_priv *priv;
+ struct xsmp_session_info xsmp_info;
+ union ib_gid local_gid;
+ struct ib_device *hca;
+ u8 port;
+ char gid_buf[64];
+
+ xcpm_get_xsmp_session_info(xsmp_hndl, &xsmp_info);
+ hca = xsmp_info.ib_device;
+ port = xscore_port_num(xsmp_info.port);
+ (void)ib_query_gid(hca, port, 0, &local_gid);
+
+ mutex_lock(&xve_mutex);
+ list_for_each_entry(priv, &xve_dev_list, list) {
+ if (xmsgp->net_id == cpu_to_be32(priv->net_id) &&
+ memcmp(priv->local_gid.raw, local_gid.raw,
+ sizeof(local_gid)) == 0) {
+ mutex_unlock(&xve_mutex);
+ print_mgid_buf(gid_buf, local_gid.raw);
+ pr_info("XVE: %s,%s Multiple VNIC on same pvi",
+ xmsgp->xve_name, priv->xve_name);
+ pr_info("%d on same port %s NOT allowed\n",
+ priv->net_id, gid_buf + 8);
+ return -EEXIST;
+ }
+ }
+ mutex_unlock(&xve_mutex);
+ return 0;
+}
+
+static int xve_check_for_hca(xsmp_cookie_t xsmp_hndl)
+{
+ struct ib_device *hca;
+ struct xsmp_session_info xsmp_info;
+
+ xcpm_get_xsmp_session_info(xsmp_hndl, &xsmp_info);
+ hca = xsmp_info.ib_device;
+
+ if (!((strncmp(hca->name, "mlx4", 4) != 0) ||
+ (strncmp(hca->name, "sif0", 4) != 0)))
+ return -EEXIST;
+
+ return 0;
+}
+
+struct xve_dev_priv *xve_get_xve_by_vid(u64 resource_id)
+{
+ struct xve_dev_priv *priv;
+
+ mutex_lock(&xve_mutex);
+ list_for_each_entry(priv, &xve_dev_list, list) {
+ if (priv->resource_id == resource_id) {
+ mutex_unlock(&xve_mutex);
+ return priv;
+ }
+ }
+ mutex_unlock(&xve_mutex);
+
+ return NULL;
+}
+
+struct xve_dev_priv *xve_get_xve_by_name(char *xve_name)
+{
+ struct xve_dev_priv *priv;
+
+ mutex_lock(&xve_mutex);
+ list_for_each_entry(priv, &xve_dev_list, list) {
+ if (strcmp(priv->xve_name, xve_name) == 0) {
+ mutex_unlock(&xve_mutex);
+ return priv;
+ }
+ }
+ mutex_unlock(&xve_mutex);
+
+ return NULL;
+}
+
+int xve_xsmp_send_oper_state(struct xve_dev_priv *priv, u64 vid, int state)
+{
+ int ret;
+ char *str = state == XSMP_XVE_OPER_UP ? "UP" : "DOWN";
+
+ pr_info("XVE: %s Sending OPER state [%d] to %s\n",
+ __func__, state, priv->xve_name);
+ if (state == XSMP_XVE_OPER_UP) {
+ set_bit(XVE_OPER_REP_SENT, &priv->state);
+ set_bit(XVE_PORT_LINK_UP, &priv->state);
+ } else {
+ clear_bit(XVE_OPER_REP_SENT, &priv->state);
+ clear_bit(XVE_PORT_LINK_UP, &priv->state);
+ }
+
+ ret = xve_xsmp_send_notification(priv, vid, state);
+ XSMP_INFO("XVE: %s:Oper %s notification for ", __func__, str);
+ XSMP_INFO("resource_id: 0x%Lx state %d\n", vid, state);
+
+ return ret;
+}
+
+static void xve_set_oper_up_state(struct xve_dev_priv *priv)
+{
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ set_bit(XVE_OPER_UP, &priv->state);
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int handle_admin_state_change(struct xve_dev_priv *priv,
+ struct xve_xsmp_msg *xmsgp)
+{
+ if (xmsgp->admin_state) {
+ XSMP_INFO("%s: VNIC %s Admin state up message\n", __func__,
+ priv->xve_name);
+ if (!test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state)) {
+ priv->counters[XVE_ADMIN_UP_COUNTER]++;
+ set_bit(XVE_CHASSIS_ADMIN_UP, &priv->state);
+ set_bit(XVE_SEND_ADMIN_STATE, &priv->state);
+ /*
+ * We wont have notification from XT as in
+ * VNIC so set OPER_UP Here
+ */
+ xve_set_oper_up_state(priv);
+ }
+ } else { /* Admin Down */
+ XSMP_INFO("%s: VNIC %s Admin state down message\n",
+ __func__, priv->xve_name);
+ if (test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state)) {
+ priv->counters[XVE_ADMIN_DOWN_COUNTER]++;
+ clear_bit(XVE_CHASSIS_ADMIN_UP, &priv->state);
+ set_bit(XVE_SEND_ADMIN_STATE, &priv->state);
+ }
+ }
+ return 0;
+}
+
+void xve_xsmp_handle_oper_req(xsmp_cookie_t xsmp_hndl, u64 resource_id)
+{
+ struct xve_dev_priv *priv;
+ unsigned long flags = 0;
+
+ priv = xve_get_xve_by_vid(resource_id);
+ if (!priv)
+ return;
+ spin_lock_irqsave(&priv->lock, flags);
+
+ priv->counters[XVE_OPER_REQ_COUNTER]++;
+ xve_xsmp_send_oper_state(priv, resource_id,
+ test_bit(XVE_OPER_UP,
+ &priv->state) ? XSMP_XVE_OPER_UP :
+ XSMP_XVE_OPER_DOWN);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+}
+
+static int xve_xsmp_send_ack(struct xve_dev_priv *priv,
+ struct xve_xsmp_msg *xmsgp)
+{
+ void *msg;
+ struct xsmp_message_header *m_header;
+ int total_len = sizeof(*xmsgp) + sizeof(*m_header);
+ xsmp_cookie_t xsmp_hndl = priv->xsmp_hndl;
+
+ msg = xcpm_alloc_msg(total_len);
+ if (!msg)
+ return -ENOMEM;
+ m_header = (struct xsmp_message_header *)msg;
+ m_header->type = XSMP_MESSAGE_TYPE_XVE;
+ m_header->length = total_len;
+
+ xmsgp->code = 0;
+ xmsgp->vn_mtu = cpu_to_be16(priv->admin_mtu);
+ xmsgp->net_id = cpu_to_be32(priv->net_id);
+ pr_info("XVE: %s ACK back with admin mtu ", __func__);
+ pr_info("%d for %s", xmsgp->vn_mtu, priv->xve_name);
+ pr_info("[netid %d ]\n", xmsgp->net_id);
+
+ memcpy(msg + sizeof(*m_header), xmsgp, sizeof(*xmsgp));
+
+ return xve_xsmp_send_msg(xsmp_hndl, msg, total_len);
+}
+
+/*
+ * Handle install message
+ */
+
+static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp,
+ void *data, int len)
+{
+ struct net_device *netdev;
+ struct xve_dev_priv *priv;
+ char xve_name[XVE_MAX_NAME_SIZE];
+ int ret = 0;
+ int update_state = 0;
+ int result = -ENOMEM;
+ struct ib_device *hca;
+ u8 port;
+ __be32 net_id_be;
+ u8 ecode = 0;
+
+ if (xve_check_for_hca(xsmp_hndl) != 0) {
+ pr_info("Warning !!!!! Unsupported HCA card for xve ");
+ pr_info("interface - %s XSF feature is only ", xmsgp->xve_name);
+ pr_info("supported on Connect-X HCA cards !!!!!!!");
+ ret = -EEXIST;
+ goto dup_error;
+ }
+
+ priv = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
+ if (priv) {
+ /*
+ * Duplicate VID, send ACK, send oper state update
+ */
+ XSMP_ERROR
+ ("%s: Duplicate XVE install message name: %s, VID=0x%llx\n",
+ __func__, xmsgp->xve_name,
+ be64_to_cpu(xmsgp->resource_id));
+ ret = -EEXIST;
+ update_state = 1;
+ priv->xsmp_hndl = xsmp_hndl;
+ goto send_ack;
+ }
+
+ if (xcpm_check_duplicate_names
+ (xsmp_hndl, xmsgp->xve_name, XSMP_MESSAGE_TYPE_VNIC) != 0) {
+ pr_info("%s Duplicate name %s\n", __func__, xmsgp->xve_name);
+ ret = -EEXIST;
+ goto dup_error;
+ }
+
+ if (xcpm_check_vnic_from_same_pvi(xsmp_hndl, xmsgp) != 0) {
+ ret = -EEXIST;
+ goto dup_error;
+ }
+
+ strncpy(xve_name, xmsgp->xve_name, sizeof(xve_name) - 1);
+
+ priv = xve_get_xve_by_name(xve_name);
+ if (priv) {
+ XSMP_ERROR("%s: Duplicate name: %s, VID=0x%llx\n",
+ __func__, xmsgp->xve_name,
+ be64_to_cpu(xmsgp->resource_id));
+ ret = -EEXIST;
+ goto dup_error;
+ }
+
+ netdev =
+ alloc_netdev(sizeof(*priv), xve_name, NET_NAME_UNKNOWN, &xve_setup);
+ if (netdev == NULL) {
+ XSMP_ERROR("%s: alloc_netdev error name: %s, VID=0x%llx\n",
+ __func__, xmsgp->xve_name,
+ be64_to_cpu(xmsgp->resource_id));
+ ret = -ENOMEM;
+ ecode = XVE_NACK_ALLOCATION_ERROR;
+ goto dup_error;
+ }
+ priv = netdev_priv(netdev);
+
+ pr_info("XVE: %s Installing xve %s - ", __func__, xmsgp->xve_name);
+ pr_info("resource id %llx", be64_to_cpu(xmsgp->resource_id));
+ pr_info("priv DS %p\n", priv);
+
+ xcpm_get_xsmp_session_info(xsmp_hndl, &priv->xsmp_info);
+ hca = priv->xsmp_info.ib_device;
+ port = xscore_port_num(priv->xsmp_info.port);
+ /* Parse PVI parameters */
+ priv->vnet_mode = (xmsgp->vnet_mode);
+ priv->net_id = be32_to_cpu(xmsgp->net_id);
+ priv->netdev->mtu = be16_to_cpu(xmsgp->vn_mtu);
+ priv->resource_id = be64_to_cpu(xmsgp->resource_id);
+ priv->mp_flag = be16_to_cpu(xmsgp->mp_flag);
+ priv->xsmp_hndl = xsmp_hndl;
+ priv->sm_delay = 1000;
+ priv->aging_delay = xve_aging_timeout * HZ;
+ strcpy(priv->xve_name, xmsgp->xve_name);
+ strcpy(priv->proc_name, priv->xve_name);
+ net_id_be = cpu_to_be32(priv->net_id);
+
+ /* Always set chassis ADMIN up by default */
+ set_bit(XVE_CHASSIS_ADMIN_UP, &priv->state);
+
+ if (!ib_query_port(hca, port, &priv->port_attr))
+ priv->max_ib_mtu = ib_mtu_enum_to_int(priv->port_attr.max_mtu);
+ else {
+ pr_warn("%s: ib_query_port %d failed\n", hca->name, port);
+ goto device_init_failed;
+ }
+
+ memcpy(priv->bcast_mgid.raw, bcast_mgid, sizeof(union ib_gid));
+ pr_info("XVE: %s adding vnic %s ", __func__, priv->xve_name);
+ pr_info("net_id %d vnet_mode %d", priv->net_id, priv->vnet_mode);
+ pr_info("port %d net_id_be %d\n", port, net_id_be);
+ memcpy(&priv->bcast_mgid.raw[4], &net_id_be, sizeof(net_id_be));
+
+ result = ib_query_pkey(hca, port, 0, &priv->pkey);
+ if (result) {
+ pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
+ hca->name, port, result);
+ goto device_init_failed;
+ }
+
+ if (xve_set_dev_features(priv, hca))
+ goto device_init_failed;
+ /*
+ * Set the full membership bit, so that we join the right
+ * broadcast group, etc.
+ */
+ priv->pkey |= 0x8000;
+
+ result = ib_query_gid(hca, port, 0, &priv->local_gid);
+
+ if (result) {
+ pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n",
+ hca->name, port, result);
+ goto device_init_failed;
+ } else {
+ u64 m;
+
+ m = xmsgp->mac_high;
+ m = m << 32 | xmsgp->mac_low;
+ m = be64_to_cpu(m);
+ memcpy(priv->netdev->dev_addr, (u8 *) (&m) + 2, ETH_ALEN);
+ priv->mac = m << 32 | xmsgp->mac_low;
+ }
+
+ result = xve_dev_init(priv->netdev, hca, port);
+ if (result != 0) {
+ pr_warn
+ ("%s: failed to initialize port %d net_id %d (ret = %d)\n",
+ hca->name, port, priv->net_id, result);
+ goto device_init_failed;
+ }
+
+ INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, xve_event);
+ result = ib_register_event_handler(&priv->event_handler);
+ if (result < 0) {
+ pr_warn("%s: ib_register_event_handler failed for ", hca->name);
+ pr_warn("port %d net_id %d (ret = %d)\n",
+ port, priv->net_id, result);
+ goto event_failed;
+ }
+
+ xve_fwt_init(&priv->xve_fwt);
+
+ if (xve_add_proc_entry(priv)) {
+ pr_err("XVE; %s procfs error name: %s, VID=0x%llx\n",
+ __func__, priv->xve_name,
+ be64_to_cpu(xmsgp->resource_id));
+ goto proc_error;
+ }
+
+ result = register_netdev(priv->netdev);
+ if (result) {
+ pr_warn("%s: couldn't register xve %d net_id %d; error %d\n",
+ hca->name, port, priv->net_id, result);
+ goto register_failed;
+ }
+
+ handle_carrier_state(priv, 0);
+ if (xve_esx_postregister_setup(priv->netdev)) {
+ ecode = XVE_NACK_ALLOCATION_ERROR;
+ goto sysfs_failed;
+ }
+
+ mutex_lock(&xve_mutex);
+ list_add_tail(&priv->list, &xve_dev_list);
+ mutex_unlock(&xve_mutex);
+
+ xve_send_msg_to_xsigod(xsmp_hndl, data, len);
+
+ queue_sm_work(priv, 0);
+
+ pr_info("%s Successfully created xve [%s]\n", __func__,
+ xmsgp->xve_name);
+
+send_ack:
+ ret = xve_xsmp_send_ack(priv, xmsgp);
+ if (ret) {
+ XSMP_ERROR("%s: xve_xsmp_send_ack error name: %s, VID=0x%llx\n",
+ __func__, xmsgp->xve_name,
+ be64_to_cpu(xmsgp->resource_id));
+ }
+ if (update_state) {
+ printk
+ ("XVE: %s Sending Oper state to chassis for %s id %llx\n",
+ __func__, priv->xve_name, priv->resource_id);
+ (void)xve_xsmp_handle_oper_req(priv->xsmp_hndl,
+ priv->resource_id);
+ }
+
+ return 0;
+
+sysfs_failed:
+ unregister_netdev(priv->netdev);
+register_failed:
+proc_error:
+ ib_unregister_event_handler(&priv->event_handler);
+event_failed:
+ xve_dev_cleanup(priv->netdev);
+device_init_failed:
+ free_netdev(priv->netdev);
+dup_error:
+ (void)xve_xsmp_send_nack(xsmp_hndl, xmsgp, sizeof(*xmsgp), ecode);
+ return ret;
+
+}
+
+static void xve_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data, int len)
+{
+ void *tmsg;
+
+ tmsg = xcpm_alloc_msg(len);
+ if (!tmsg)
+ return;
+ memcpy(tmsg, data, len);
+ if (xcpm_send_msg_xsigod(xsmp_hndl, tmsg, len))
+ xcpm_free_msg(tmsg);
+}
+
+static void xve_handle_ip_req(xsmp_cookie_t xsmp_hndl, u8 *data, int len)
+{
+ struct xve_xsmp_vlanip_msg *msgp =
+ (struct xve_xsmp_vlanip_msg *)(data +
+ sizeof(struct xsmp_message_header));
+ struct xve_dev_priv *priv;
+
+ priv = xve_get_xve_by_vid(be64_to_cpu(msgp->resource_id));
+ if (!priv) {
+ xve_counters[XVE_VNIC_DEL_NOVID_COUNTER]++;
+ return;
+ }
+ XSMP_INFO("%s:XSMP message type VLAN IP for %s\n", __func__,
+ priv->xve_name);
+ strcpy(msgp->ifname, priv->xve_name);
+ msgp->mp_flag = cpu_to_be16(priv->mp_flag);
+
+ /*
+ * Punt this message to userspace
+ */
+ xve_send_msg_to_xsigod(xsmp_hndl, data, len);
+}
+
+static void xve_xsmp_send_stats(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
+{
+ struct xve_dev_priv *priv;
+ struct xve_xsmp_stats_msg *msgp =
+ (struct xve_xsmp_stats_msg *)(data +
+ sizeof(struct xsmp_message_header));
+
+ void *msg;
+ struct xsmp_message_header *m_header;
+
+ priv = xve_get_xve_by_vid(be64_to_cpu(msgp->resource_id));
+ if (!priv) {
+ xve_test("XVE: %s priv not found for %llx\n",
+ __func__, be64_to_cpu(msgp->resource_id));
+ return;
+ }
+
+ msg = xcpm_alloc_msg(length);
+ if (!msg)
+ return;
+ m_header = (struct xsmp_message_header *)msg;
+ m_header->type = XSMP_MESSAGE_TYPE_XVE;
+ m_header->length = length;
+
+ /* Clear stats */
+ if (msgp->bitmask == 0)
+ memset(&priv->stats, 0, sizeof(struct net_device_stats));
+ msgp->rx_packets = priv->stats.rx_packets;
+ msgp->rx_bytes = priv->stats.rx_bytes;
+ msgp->rx_errors = priv->stats.rx_errors;
+ msgp->rx_drops = priv->stats.rx_dropped;
+
+ msgp->tx_packets = priv->stats.tx_packets;
+ msgp->tx_bytes = priv->stats.tx_bytes;
+ msgp->tx_errors = priv->stats.tx_errors;
+ msgp->tx_drops = priv->stats.tx_dropped;
+
+ memcpy(msg + sizeof(*m_header), msgp, sizeof(*msgp));
+ xve_xsmp_send_msg(priv->xsmp_hndl, msg, length);
+
+}
+
+static int xve_xsmp_update(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp)
+{
+ u32 bitmask = be32_to_cpu(xmsgp->bitmask);
+ struct xve_dev_priv *xvep;
+ int ret = 0;
+ int send_ack = 1;
+
+ xvep = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
+ if (!xvep) {
+ XSMP_ERROR("%s: request for invalid vid: 0x%llx\n",
+ __func__, be64_to_cpu(xmsgp->resource_id));
+ return -EINVAL;
+ }
+
+ XSMP_INFO("%s: VNIC: %s bit mask: 0x%x\n", __func__, xvep->xve_name,
+ bitmask);
+
+ mutex_lock(&xvep->mutex);
+
+ if (bitmask & XVE_UPDATE_ADMIN_STATE) {
+ ret = handle_admin_state_change(xvep, xmsgp);
+ /*
+ * Ack will be sent once QP's are brought down
+ */
+ send_ack = 0;
+ }
+
+ if (send_ack) {
+ ret = xve_xsmp_send_ack(xvep, xmsgp);
+ if (ret)
+ XSMP_ERROR("%s: xve_xsmp_send_ack error name: %s\n"
+ "VID=0x%llx\n", __func__, xmsgp->xve_name,
+ be64_to_cpu(xmsgp->resource_id));
+ }
+ mutex_unlock(&xvep->mutex);
+
+ return ret;
+}
+
+/*
+ * We set the DELETING bit and let sm_work thread handle delete
+ */
+static void xve_handle_del_message(xsmp_cookie_t xsmp_hndl,
+ struct xve_xsmp_msg *xmsgp)
+{
+ struct xve_dev_priv *priv;
+ unsigned long flags;
+
+ priv = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
+ if (!priv) {
+ XSMP_INFO("XVE: %s priv not found for %s\n",
+ __func__, xmsgp->xve_name);
+ return;
+ }
+ spin_lock_irqsave(&priv->lock, flags);
+ set_bit(XVE_DELETING, &priv->state);
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+}
+
+static void handle_xve_xsmp_messages(xsmp_cookie_t xsmp_hndl, u8 *data,
+ int length)
+{
+ int hlen;
+ struct xsmp_message_header *header = (struct xsmp_message_header *)data;
+ struct xve_xsmp_msg *xmsgp =
+ (struct xve_xsmp_msg *)(data + sizeof(*header));
+
+ if (length < sizeof(*header))
+ return;
+ hlen = be16_to_cpu(header->length);
+ if (hlen > length)
+ return;
+ if (header->type != XSMP_MESSAGE_TYPE_XVE)
+ return;
+ XSMP_INFO("%s: XSMP message type: %d\n", __func__, xmsgp->type);
+
+ switch (xmsgp->type) {
+ case XSMP_XVE_VLANIP:
+ xve_handle_ip_req(xsmp_hndl, data, length);
+ break;
+ case XSMP_XVE_INFO_REQUEST:
+ break;
+ case XSMP_XVE_INSTALL:
+ xve_counters[XVE_VNIC_INSTALL_COUNTER]++;
+ xve_xsmp_install(xsmp_hndl, xmsgp, data, length);
+ break;
+ case XSMP_XVE_DELETE:
+ xve_counters[XVE_VNIC_DEL_COUNTER]++;
+ xve_handle_del_message(xsmp_hndl, xmsgp);
+ break;
+ case XSMP_XVE_UPDATE:
+ xve_counters[XVE_VNIC_UPDATE_COUNTER]++;
+ xve_xsmp_update(xsmp_hndl, xmsgp);
+ break;
+ case XSMP_XVE_OPER_REQ:
+ xve_counters[XVE_VNIC_OPER_REQ_COUNTER]++;
+ (void)xve_xsmp_handle_oper_req(xsmp_hndl,
+ be64_to_cpu(xmsgp->resource_id));
+ break;
+ case XSMP_XVE_STATS:
+ xve_counters[XVE_VNIC_STATS_COUNTER]++;
+ (void)xve_xsmp_send_stats(xsmp_hndl, data, length);
+ break;
+ default:
+ break;
+ }
+}
+
+static void handle_xve_xsmp_messages_work(struct work_struct *work)
+{
+ struct xve_work *xwork = container_of(work, struct xve_work,
+ work);
+
+ (void)handle_xve_xsmp_messages(xwork->xsmp_hndl, xwork->msg,
+ xwork->len);
+ kfree(xwork->msg);
+ kfree(xwork);
+}
+
+/*
+ * Called from thread context
+ */
+static void xve_receive_handler(xsmp_cookie_t xsmp_hndl, u8 *msg, int length)
+{
+ struct xve_work *work;
+ unsigned long flags = 0;
+
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work) {
+ kfree(msg);
+ return;
+ }
+ INIT_WORK(&work->work, handle_xve_xsmp_messages_work);
+ work->xsmp_hndl = xsmp_hndl;
+ work->msg = msg;
+ work->len = length;
+
+ spin_lock_irqsave(&xve_lock, flags);
+ queue_work(xve_workqueue, &work->work);
+ spin_unlock_irqrestore(&xve_lock, flags);
+
+}
+
+/*
+ * Needs to be called with mutex lock held
+ */
+static void xve_wait_for_removal(xsmp_cookie_t xsmp_hndl)
+{
+ int is_pres;
+ struct xve_dev_priv *priv;
+
+ while (1) {
+ is_pres = 0;
+ list_for_each_entry(priv, &xve_dev_list, list) {
+ if (xsmp_sessions_match(&priv->xsmp_info, xsmp_hndl))
+ is_pres = 1;
+ }
+ if (is_pres) {
+ mutex_unlock(&xve_mutex);
+ msleep(100);
+ mutex_lock(&xve_mutex);
+ } else
+ break;
+ }
+}
+
+static void xve_xsmp_event_handler(xsmp_cookie_t xsmp_hndl, int event)
+{
+ struct xve_dev_priv *priv;
+ unsigned long flags;
+
+ mutex_lock(&xve_mutex);
+
+ switch (event) {
+ case XSCORE_PORT_UP:
+ case XSCORE_PORT_DOWN:
+ list_for_each_entry(priv, &xve_dev_list, list) {
+ if (xsmp_sessions_match(&priv->xsmp_info, xsmp_hndl)) {
+ if (event == XSCORE_PORT_DOWN) {
+ set_bit(XVE_IBLINK_DOWN, &priv->state);
+ priv->counters
+ [XVE_IBLINK_DOWN_COUNTER]++;
+ } else {
+ clear_bit(XVE_IBLINK_DOWN,
+ &priv->state);
+ xve_set_oper_up_state(priv);
+ priv->counters[XVE_IBLINK_UP_COUNTER]++;
+ }
+ }
+ }
+ break;
+ case XSCORE_DEVICE_REMOVAL:
+ xve_counters[XVE_DEVICE_REMOVAL_COUNTER]++;
+ list_for_each_entry(priv, &xve_dev_list, list) {
+ if (xsmp_sessions_match(&priv->xsmp_info, xsmp_hndl)) {
+ spin_lock_irqsave(&priv->lock, flags);
+ set_bit(XVE_DELETING, &priv->state);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
+ }
+ /*
+ * Now wait for all the vnics to be deleted
+ */
+ xve_wait_for_removal(xsmp_hndl);
+ break;
+ case XSCORE_CONN_CONNECTED:
+ list_for_each_entry(priv, &xve_dev_list, list) {
+ if (xsmp_sessions_match(&priv->xsmp_info, xsmp_hndl))
+ priv->xsmp_hndl = xsmp_hndl;
+ }
+ break;
+ default:
+ break;
+ }
+
+ mutex_unlock(&xve_mutex);
+}
+
+static int xve_xsmp_callout_handler(char *name)
+{
+ struct xve_dev_priv *priv;
+ int ret = 0;
+
+ mutex_lock(&xve_mutex);
+ list_for_each_entry(priv, &xve_dev_list, list) {
+ /* CHECK for duplicate name */
+ if (strcmp(priv->xve_name, name) == 0) {
+ ret = -EINVAL;
+ break;
+ }
+ }
+ mutex_unlock(&xve_mutex);
+ return ret;
+}
+
+int xve_xsmp_init(void)
+{
+ struct xsmp_service_reg_info service_info = {
+ .receive_handler = xve_receive_handler,
+ .event_handler = xve_xsmp_event_handler,
+ .callout_handler = xve_xsmp_callout_handler,
+ .ctrl_message_type = XSMP_MESSAGE_TYPE_XVE,
+ .resource_flag_index = RESOURCE_FLAG_INDEX_XVE
+ };
+
+ xve_xsmp_service_id = xcpm_register_service(&service_info);
+ if (xve_xsmp_service_id < 0)
+ return xve_xsmp_service_id;
+ return 0;
+}
+
+void xve_xsmp_exit(void)
+{
+ (void)xcpm_unregister_service(xve_xsmp_service_id);
+ xve_xsmp_service_id = -1;
+}
+
+static int __init xve_init_module(void)
+{
+ int ret;
+
+ INIT_LIST_HEAD(&xve_dev_list);
+ spin_lock_init(&xve_lock);
+
+ mutex_init(&xve_mutex);
+
+ xve_recvq_size = roundup_pow_of_two(xve_recvq_size);
+ xve_recvq_size = min(xve_recvq_size, XVE_MAX_QUEUE_SIZE);
+ xve_recvq_size = max(xve_recvq_size, XVE_MIN_QUEUE_SIZE);
+
+ xve_sendq_size = roundup_pow_of_two(xve_sendq_size);
+ xve_sendq_size = min(xve_sendq_size, XVE_MAX_QUEUE_SIZE);
+ xve_sendq_size = max(xve_sendq_size, max(2 * MAX_SEND_CQE,
+ XVE_MIN_QUEUE_SIZE));
+ /*
+ * When copying small received packets, we only copy from the
+ * linear data part of the SKB, so we rely on this condition.
+ */
+ BUILD_BUG_ON(XVE_CM_COPYBREAK > XVE_CM_HEAD_SIZE);
+
+ ret = xve_create_procfs_root_entries();
+ if (ret)
+ return ret;
+
+ ret = xve_tables_init();
+ if (ret)
+ goto err_fs;
+
+ /*
+ * We create our own workqueue mainly because we want to be
+ * able to flush it when devices are being removed. We can't
+ * use schedule_work()/flush_scheduled_work() because both
+ * unregister_netdev() and linkwatch_event take the rtnl lock,
+ * so flush_scheduled_work() can deadlock during device
+ * removal.
+ */
+ xve_workqueue = create_singlethread_workqueue("xve");
+ if (!xve_workqueue) {
+ ret = -ENOMEM;
+ goto err_tables;
+ }
+
+ xve_taskqueue = create_singlethread_workqueue("xve_taskq");
+ if (!xve_taskqueue) {
+ ret = -ENOMEM;
+ goto err_tables;
+ }
+
+ xve_xsmp_init();
+ /*
+ * Now register with IB framework
+ */
+ ib_sa_register_client(&xve_sa_client);
+ return 0;
+
+err_tables:
+ xve_tables_exit();
+
+err_fs:
+ xve_remove_procfs_root_entries();
+ return ret;
+}
+
+static void __exit xve_cleanup_module(void)
+{
+ struct xve_dev_priv *priv;
+ unsigned long flags = 0;
+
+ pr_info("XVE: %s Remove module\n", __func__);
+ xve_xsmp_exit();
+
+ mutex_lock(&xve_mutex);
+
+ list_for_each_entry(priv, &xve_dev_list, list) {
+ spin_lock_irqsave(&priv->lock, flags);
+ set_bit(XVE_DELETING, &priv->state);
+ set_bit(XVE_SHUTDOWN, &priv->state);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
+
+ while (!list_empty(&xve_dev_list)) {
+ mutex_unlock(&xve_mutex);
+ msleep(100);
+ mutex_lock(&xve_mutex);
+ }
+ mutex_unlock(&xve_mutex);
+ ib_sa_unregister_client(&xve_sa_client);
+ xve_tables_exit();
+ mutex_lock(&xve_mutex);
+ flush_workqueue(xve_workqueue);
+ destroy_workqueue(xve_workqueue);
+ flush_workqueue(xve_taskqueue);
+ destroy_workqueue(xve_taskqueue);
+ mutex_unlock(&xve_mutex);
+
+ xve_remove_procfs_root_entries();
+ mutex_destroy(&xve_mutex);
+ pr_info("XVE: %s module remove success\n", __func__);
+}
+
+module_init(xve_init_module);
+module_exit(xve_cleanup_module);
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+#include <linux/delay.h>
+#include <linux/completion.h>
+
+#include "xve.h"
+#include "xve_compat.h"
+
+static int rate_selector = IB_SA_EQ;
+module_param(rate_selector, int, 0444);
+MODULE_PARM_DESC(rate_selector, "Multicast rate selector");
+
+static int mcast_rate = IB_RATE_10_GBPS;
+module_param(mcast_rate, int, 0444);
+MODULE_PARM_DESC(mcast_rate, "Multicast rate during join/create");
+
+static DEFINE_MUTEX(mcast_mutex);
+
+struct xve_mcast_iter {
+ struct net_device *dev;
+ union ib_gid mgid;
+ unsigned long created;
+ unsigned int queuelen;
+ unsigned int complete;
+ unsigned int send_only;
+};
+
+static void xve_mcast_free(struct xve_mcast *mcast)
+{
+ struct net_device *dev = mcast->netdev;
+ int tx_dropped = 0;
+
+ xve_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
+ mcast->mcmember.mgid.raw);
+
+ if (mcast->ah)
+ xve_put_ah(mcast->ah);
+
+ while (!skb_queue_empty(&mcast->pkt_queue)) {
+ ++tx_dropped;
+ dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
+ }
+
+ netif_tx_lock_bh(dev);
+ dev->stats.tx_dropped += tx_dropped;
+ ((struct xve_dev_priv *)netdev_priv(dev))->stats.tx_dropped +=
+ tx_dropped;
+ netif_tx_unlock_bh(dev);
+
+ kfree(mcast);
+}
+
+static struct xve_mcast *xve_mcast_alloc(struct net_device *dev, int can_sleep)
+{
+ struct xve_mcast *mcast;
+
+ mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC);
+ if (!mcast)
+ return NULL;
+
+ mcast->netdev = dev;
+ mcast->created = jiffies;
+ mcast->used = jiffies;
+ mcast->backoff = 1;
+
+ INIT_LIST_HEAD(&mcast->list);
+ skb_queue_head_init(&mcast->pkt_queue);
+
+ return mcast;
+}
+
+static struct xve_mcast *__xve_mcast_find(struct net_device *dev, void *mgid)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct rb_node *n = priv->multicast_tree.rb_node;
+
+ while (n) {
+ struct xve_mcast *mcast;
+ int ret;
+
+ mcast = rb_entry(n, struct xve_mcast, rb_node);
+
+ ret = memcmp(mgid, mcast->mcmember.mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ return mcast;
+ }
+
+ return NULL;
+}
+
+static int __xve_mcast_add(struct net_device *dev, struct xve_mcast *mcast)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
+
+ while (*n) {
+ struct xve_mcast *tmcast;
+ int ret;
+
+ pn = *n;
+ tmcast = rb_entry(pn, struct xve_mcast, rb_node);
+
+ ret =
+ memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0)
+ n = &pn->rb_left;
+ else if (ret > 0)
+ n = &pn->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(&mcast->rb_node, pn, n);
+ rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
+
+ return 0;
+}
+
+static int xve_mcast_join_finish(struct xve_mcast *mcast,
+ struct ib_sa_mcmember_rec *mcmember)
+{
+ struct net_device *dev = mcast->netdev;
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_ah *ah;
+ int ret;
+ int set_qkey = 0;
+
+ mcast->mcmember = *mcmember;
+
+ /* Set the cached Q_Key before we attach if it's the broadcast group */
+ if (!memcmp(mcast->mcmember.mgid.raw, priv->bcast_mgid.raw,
+ sizeof(union ib_gid))) {
+ spin_lock_irq(&priv->lock);
+ if (!priv->broadcast) {
+ spin_unlock_irq(&priv->lock);
+ return -EAGAIN;
+ }
+ priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
+ spin_unlock_irq(&priv->lock);
+ priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
+ set_qkey = 1;
+ }
+
+ if (!test_bit(XVE_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+ if (test_and_set_bit(XVE_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+ xve_warn(priv,
+ "multicast group %pI6 already attached\n",
+ mcast->mcmember.mgid.raw);
+
+ return 0;
+ }
+
+ ret = xve_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
+ &mcast->mcmember.mgid, set_qkey);
+ if (ret < 0) {
+ xve_warn(priv,
+ "couldn't attach QP to multicast group %pI6\n",
+ mcast->mcmember.mgid.raw);
+
+ clear_bit(XVE_MCAST_FLAG_ATTACHED, &mcast->flags);
+ return ret;
+ }
+ }
+
+ {
+ struct ib_ah_attr av = {
+ .dlid = be16_to_cpu(mcast->mcmember.mlid),
+ .port_num = priv->port,
+ .sl = mcast->mcmember.sl,
+ .ah_flags = IB_AH_GRH,
+ .static_rate = mcast->mcmember.rate,
+ .grh = {
+ .flow_label =
+ be32_to_cpu(mcast->mcmember.flow_label),
+ .hop_limit = mcast->mcmember.hop_limit,
+ .sgid_index = 0,
+ .traffic_class = mcast->mcmember.traffic_class}
+ };
+ av.grh.dgid = mcast->mcmember.mgid;
+
+ ah = xve_create_ah(dev, priv->pd, &av);
+ if (!ah) {
+ xve_warn(priv, "ib_address_create failed\n");
+ } else {
+ spin_lock_irq(&priv->lock);
+ mcast->ah = ah;
+ spin_unlock_irq(&priv->lock);
+
+ xve_dbg_mcast(priv,
+ "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
+ mcast->mcmember.mgid.raw, mcast->ah->ah,
+ be16_to_cpu(mcast->mcmember.mlid),
+ mcast->mcmember.sl);
+ }
+ }
+
+ /* actually send any queued packets */
+ netif_tx_lock_bh(dev);
+ while (!skb_queue_empty(&mcast->pkt_queue)) {
+ struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
+
+ netif_tx_unlock_bh(dev);
+ skb->dev = dev;
+ if (dev_queue_xmit(skb))
+ xve_warn(priv,
+ "dev_queue_xmit failed to requeue packet\n");
+ netif_tx_lock_bh(dev);
+ }
+ netif_tx_unlock_bh(dev);
+
+ return 0;
+}
+
+static int xve_mcast_sendonly_join_complete(int status,
+ struct ib_sa_multicast *multicast)
+{
+ struct xve_mcast *mcast = multicast->context;
+ struct net_device *dev = mcast->netdev;
+
+ /* We trap for port events ourselves. */
+ if (status == -ENETRESET)
+ return 0;
+
+ if (!status)
+ status = xve_mcast_join_finish(mcast, &multicast->rec);
+
+ if (status) {
+ if (mcast->logcount++ < 20)
+ xve_dbg_mcast(netdev_priv(dev),
+ "%s multicast join failed for %pI6, status %d\n",
+ __func__, mcast->mcmember.mgid.raw,
+ status);
+
+ /* Flush out any queued packets */
+ netif_tx_lock_bh(dev);
+ while (!skb_queue_empty(&mcast->pkt_queue)) {
+ INC_TX_DROP_STATS(((struct xve_dev_priv *)
+ netdev_priv(dev)), dev);
+ dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
+ }
+ netif_tx_unlock_bh(dev);
+ /* Clear the busy flag so we try again */
+ status = test_and_clear_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
+ }
+ return status;
+}
+
+static int xve_mcast_sendonly_join(struct xve_mcast *mcast)
+{
+ struct net_device *dev = mcast->netdev;
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_sa_mcmember_rec rec = {
+ .join_state = 1
+ };
+ ib_sa_comp_mask comp_mask;
+ int ret = 0;
+
+ if (!test_bit(XVE_FLAG_OPER_UP, &priv->flags)) {
+ xve_dbg_mcast(priv,
+ "device shutting down, no multicast joins\n");
+ return -ENODEV;
+ }
+
+ if (test_and_set_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags)) {
+ xve_dbg_mcast(priv, "multicast entry busy, skipping\n");
+ return -EBUSY;
+ }
+
+ rec.mgid = mcast->mcmember.mgid;
+ rec.port_gid = priv->local_gid;
+ rec.pkey = cpu_to_be16(priv->pkey);
+
+ comp_mask =
+ IB_SA_MCMEMBER_REC_MGID |
+ IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE;
+
+ if (priv->broadcast) {
+ comp_mask |=
+ IB_SA_MCMEMBER_REC_QKEY |
+ IB_SA_MCMEMBER_REC_MTU_SELECTOR |
+ IB_SA_MCMEMBER_REC_MTU |
+ IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |
+ IB_SA_MCMEMBER_REC_RATE_SELECTOR |
+ IB_SA_MCMEMBER_REC_RATE |
+ IB_SA_MCMEMBER_REC_SL |
+ IB_SA_MCMEMBER_REC_FLOW_LABEL |
+ IB_SA_MCMEMBER_REC_HOP_LIMIT;
+
+ rec.qkey = priv->broadcast->mcmember.qkey;
+ rec.mtu_selector = IB_SA_EQ;
+ rec.mtu = priv->broadcast->mcmember.mtu;
+ rec.traffic_class = priv->broadcast->mcmember.traffic_class;
+ rec.rate_selector = IB_SA_EQ;
+ rec.rate = priv->broadcast->mcmember.rate;
+ rec.sl = priv->broadcast->mcmember.sl;
+ rec.flow_label = priv->broadcast->mcmember.flow_label;
+ rec.hop_limit = priv->broadcast->mcmember.hop_limit;
+ }
+ xve_dbg_mcast(priv, "%s Joining send only join mtu %d\n", __func__,
+ rec.mtu);
+
+ mcast->mc = ib_sa_join_multicast(&xve_sa_client, priv->ca,
+ priv->port, &rec,
+ comp_mask,
+ GFP_ATOMIC,
+ xve_mcast_sendonly_join_complete,
+ mcast);
+ if (IS_ERR(mcast->mc)) {
+ ret = PTR_ERR(mcast->mc);
+ clear_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
+ xve_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", ret);
+ } else {
+ xve_dbg_mcast(priv,
+ "no multicast record for %pI6, starting join\n",
+ mcast->mcmember.mgid.raw);
+ }
+
+ return ret;
+}
+
+static int xve_mcast_join_complete(int status,
+ struct ib_sa_multicast *multicast)
+{
+ struct xve_mcast *mcast = multicast->context;
+ struct net_device *dev = mcast->netdev;
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ xve_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
+ mcast->mcmember.mgid.raw, status);
+
+ /* We trap for port events ourselves. */
+ if (status == -ENETRESET)
+ return 0;
+
+ if (!status)
+ status = xve_mcast_join_finish(mcast, &multicast->rec);
+
+ if (!status) {
+ mcast->backoff = 1;
+ mutex_lock(&mcast_mutex);
+ if (test_bit(XVE_MCAST_RUN, &priv->flags))
+ xve_queue_complete_work(priv, XVE_WQ_START_MCASTJOIN,
+ 0);
+ mutex_unlock(&mcast_mutex);
+
+ /*
+ * Defer carrier on work to workqueue to avoid a
+ * deadlock on rtnl_lock here.
+ */
+ if (mcast == priv->broadcast)
+ xve_queue_work(priv, XVE_WQ_START_MCASTON);
+
+ return 0;
+ }
+
+ if (mcast->logcount++ < 20) {
+ if (status == -ETIMEDOUT || status == -EAGAIN) {
+ xve_dbg_mcast(priv,
+ "%s multicast join failed for %pI6, status %d\n",
+ __func__, mcast->mcmember.mgid.raw,
+ status);
+ } else {
+ xve_warn(priv,
+ "%s multicast join failed for %pI6, status %d\n",
+ __func__, mcast->mcmember.mgid.raw, status);
+ }
+ }
+
+ mcast->backoff *= 2;
+ if (mcast->backoff > XVE_MAX_BACKOFF_SECONDS)
+ mcast->backoff = XVE_MAX_BACKOFF_SECONDS;
+
+ /* Clear the busy flag so we try again */
+ status = test_and_clear_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
+
+ mutex_lock(&mcast_mutex);
+ spin_lock_irq(&priv->lock);
+ if (test_bit(XVE_MCAST_RUN, &priv->flags))
+ xve_queue_complete_work(priv, XVE_WQ_START_MCASTJOIN,
+ mcast->backoff * HZ);
+ spin_unlock_irq(&priv->lock);
+ mutex_unlock(&mcast_mutex);
+
+ return status;
+}
+
+static void xve_mcast_join(struct net_device *dev, struct xve_mcast *mcast,
+ int create)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_sa_mcmember_rec rec = {
+ .join_state = 1
+ };
+ ib_sa_comp_mask comp_mask;
+ int ret = 0;
+
+ rec.mgid = mcast->mcmember.mgid;
+ rec.port_gid = priv->local_gid;
+ rec.pkey = cpu_to_be16(priv->pkey);
+
+ comp_mask =
+ IB_SA_MCMEMBER_REC_MGID |
+ IB_SA_MCMEMBER_REC_PORT_GID |
+ IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE;
+
+ if (create) {
+ comp_mask |=
+ IB_SA_MCMEMBER_REC_QKEY |
+ IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |
+ IB_SA_MCMEMBER_REC_SL |
+ IB_SA_MCMEMBER_REC_FLOW_LABEL |
+ IB_SA_MCMEMBER_REC_RATE_SELECTOR |
+ IB_SA_MCMEMBER_REC_RATE | IB_SA_MCMEMBER_REC_HOP_LIMIT;
+
+ rec.qkey = 0x0;
+ rec.traffic_class = 0x0;
+ rec.sl = 0x0;
+ rec.flow_label = 0x0;
+ rec.hop_limit = 0x0;
+ /*
+ * Create with 10Gbps speed (equals)
+ */
+ rec.rate_selector = rate_selector;
+ rec.rate = mcast_rate;
+ }
+
+ xve_dbg_mcast(priv, "joining MGID %pI6 pkey %d qkey %d\n",
+ mcast->mcmember.mgid.raw, rec.pkey, rec.qkey);
+ set_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
+ mcast->mc = ib_sa_join_multicast(&xve_sa_client, priv->ca, priv->port,
+ &rec, comp_mask, GFP_KERNEL,
+ xve_mcast_join_complete, mcast);
+ if (IS_ERR(mcast->mc)) {
+ clear_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
+ ret = PTR_ERR(mcast->mc);
+ xve_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
+
+ mcast->backoff *= 2;
+ if (mcast->backoff > XVE_MAX_BACKOFF_SECONDS)
+ mcast->backoff = XVE_MAX_BACKOFF_SECONDS;
+
+ mutex_lock(&mcast_mutex);
+ if (test_bit(XVE_MCAST_RUN, &priv->flags))
+ xve_queue_complete_work(priv, XVE_WQ_START_MCASTJOIN,
+ mcast->backoff * HZ);
+ mutex_unlock(&mcast_mutex);
+ }
+}
+
+void xve_mcast_join_task(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_MCASTJOIN, 2);
+ struct net_device *dev = priv->netdev;
+ struct ib_port_attr attr;
+
+ if (!test_bit(XVE_MCAST_RUN, &priv->flags))
+ return;
+
+ if (!ib_query_port(priv->ca, priv->port, &attr))
+ priv->local_lid = attr.lid;
+ else
+ xve_warn(priv, "ib_query_port failed\n");
+
+ priv->counters[XVE_MCAST_JOIN_TASK]++;
+
+ if (!priv->broadcast) {
+ struct xve_mcast *broadcast;
+
+ if (!test_bit(XVE_FLAG_ADMIN_UP, &priv->flags))
+ return;
+
+ broadcast = xve_mcast_alloc(dev, 1);
+ if (!broadcast) {
+ xve_warn(priv, "failed to allocate broadcast group\n");
+ mutex_lock(&mcast_mutex);
+ if (test_bit(XVE_MCAST_RUN, &priv->flags))
+ xve_queue_complete_work(priv,
+ XVE_WQ_START_MCASTJOIN,
+ HZ);
+ mutex_unlock(&mcast_mutex);
+ return;
+ }
+
+ spin_lock_irq(&priv->lock);
+ memcpy(broadcast->mcmember.mgid.raw, priv->bcast_mgid.raw,
+ sizeof(union ib_gid));
+ priv->broadcast = broadcast;
+ __xve_mcast_add(dev, priv->broadcast);
+ spin_unlock_irq(&priv->lock);
+ }
+
+ if (priv->broadcast &&
+ !test_bit(XVE_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
+ if (priv->broadcast &&
+ !test_bit(XVE_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+ xve_mcast_join(dev, priv->broadcast, 1);
+ return;
+ }
+
+ while (1) {
+ struct xve_mcast *mcast = NULL;
+
+ spin_lock_irq(&priv->lock);
+ list_for_each_entry(mcast, &priv->multicast_list, list) {
+ if (!test_bit(XVE_MCAST_FLAG_SENDONLY, &mcast->flags)
+ && !test_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags)
+ && !test_bit(XVE_MCAST_FLAG_ATTACHED,
+ &mcast->flags)) {
+ /* Found the next unjoined group */
+ break;
+ }
+ }
+ spin_unlock_irq(&priv->lock);
+
+ if (&mcast->list == &priv->multicast_list) {
+ /* All done */
+ break;
+ }
+
+ xve_mcast_join(dev, mcast, 1);
+ return;
+ }
+
+ spin_lock_irq(&priv->lock);
+ if (priv->broadcast)
+ priv->mcast_mtu =
+ XVE_UD_MTU(ib_mtu_enum_to_int
+ (priv->broadcast->mcmember.mtu));
+ else
+ priv->mcast_mtu = priv->admin_mtu;
+ spin_unlock_irq(&priv->lock);
+
+ if (!xve_cm_admin_enabled(dev)) {
+ printk
+ ("XVE: %s xve %s dev mtu %d, admin_mtu %d, mcast_mtu %d\n",
+ __func__, priv->xve_name, priv->netdev->mtu,
+ priv->admin_mtu, priv->mcast_mtu);
+ xve_dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
+ }
+
+ xve_dbg_mcast(priv, "successfully joined all multicast groups\n");
+ clear_bit(XVE_MCAST_RUN, &priv->flags);
+}
+
+int xve_mcast_start_thread(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ /* Dont start mcast if it the interface is not up */
+ if (!test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)
+ || !test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state))
+ return -ENOTCONN;
+
+ xve_dbg_mcast(priv, "%s Starting mcast thread for state[%ld ]\n",
+ __func__, priv->flags);
+
+ mutex_lock(&mcast_mutex);
+ if (!test_and_set_bit(XVE_MCAST_RUN, &priv->flags))
+ xve_queue_complete_work(priv, XVE_WQ_START_MCASTJOIN, 0);
+
+ if (!test_and_set_bit(XVE_MCAST_RUN_GC, &priv->flags))
+ xve_queue_complete_work(priv, XVE_WQ_START_MCASTLEAVE, 0);
+
+ mutex_unlock(&mcast_mutex);
+
+ return 0;
+}
+
+int xve_mcast_stop_thread(struct net_device *dev, int flush)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+
+ xve_dbg_mcast(priv, "stopping multicast thread\n");
+
+ mutex_lock(&mcast_mutex);
+ clear_bit(XVE_MCAST_RUN, &priv->flags);
+ clear_bit(XVE_MCAST_RUN_GC, &priv->flags);
+ cancel_delayed_work(&priv->mcast_join_task);
+ cancel_delayed_work(&priv->mcast_leave_task);
+ mutex_unlock(&mcast_mutex);
+
+ return 0;
+}
+
+static int xve_mcast_leave(struct net_device *dev, struct xve_mcast *mcast)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int ret = 0;
+
+ if (test_and_clear_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags))
+ ib_sa_free_multicast(mcast->mc);
+
+ if (test_and_clear_bit(XVE_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+ xve_dbg_mcast(priv, "leaving MGID %pI6\n",
+ mcast->mcmember.mgid.raw);
+
+ /* Remove ourselves from the multicast group */
+ if (priv->qp) {
+ if (!test_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags))
+ ret =
+ ib_detach_mcast(priv->qp,
+ &mcast->mcmember.mgid,
+ be16_to_cpu(mcast->
+ mcmember.mlid));
+ }
+ if (ret)
+ xve_warn(priv, "ib_detach_mcast failed (result = %d)\n",
+ ret);
+ }
+
+ return 0;
+}
+
+void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct xve_mcast *mcast;
+
+ if (!test_bit(XVE_FLAG_OPER_UP, &priv->flags) ||
+ !priv->broadcast ||
+ !test_bit(XVE_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
+ INC_TX_DROP_STATS(priv, dev);
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
+ mcast = __xve_mcast_find(dev, mgid);
+ if (!mcast) {
+ /* Let's create a new send only group now */
+ xve_dbg_mcast(priv,
+ "setting up send only multicast group for %pI6\n",
+ mgid);
+
+ mcast = xve_mcast_alloc(dev, 0);
+ if (!mcast) {
+ xve_warn(priv, "unable to allocate memory for ");
+ xve_warn(priv, "multicast structure\n");
+ INC_TX_DROP_STATS(priv, dev);
+ dev_kfree_skb_any(skb);
+ goto out;
+ }
+
+ set_bit(XVE_MCAST_FLAG_SENDONLY, &mcast->flags);
+ memcpy(mcast->mcmember.mgid.raw, mgid, sizeof(union ib_gid));
+ __xve_mcast_add(dev, mcast);
+ list_add_tail(&mcast->list, &priv->multicast_list);
+ }
+
+ if (!mcast->ah) {
+ if (skb_queue_len(&mcast->pkt_queue) < XVE_MAX_MCAST_QUEUE)
+ skb_queue_tail(&mcast->pkt_queue, skb);
+ else {
+ INC_TX_DROP_STATS(priv, dev);
+ dev_kfree_skb_any(skb);
+ }
+
+ if (test_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags)) {
+ xve_dbg_mcast(priv, "no address vector, ");
+ xve_dbg_mcast(priv, "but mcast join already started\n");
+ }
+ if (test_bit(XVE_MCAST_FLAG_SENDONLY, &mcast->flags))
+ xve_mcast_sendonly_join(mcast);
+ /*
+ * If lookup completes between here and out:, don't
+ * want to send packet twice.
+ */
+ mcast = NULL;
+ }
+
+out:
+ if (mcast && mcast->ah) {
+ xve_test("%s about to send mcast %02x%02x%02x%02x%02x%02x",
+ __func__, skb->data[0], skb->data[1], skb->data[2],
+ skb->data[3], skb->data[4], skb->data[5]);
+ xve_test("ah=%p proto=%02x%02x for %s\n", mcast->ah->ah,
+ skb->data[12], skb->data[13], dev->name);
+ xve_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+ }
+
+}
+
+void xve_mcast_carrier_on_task(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_MCASTON, 0);
+ struct ib_port_attr attr;
+
+ if (ib_query_port(priv->ca, priv->port, &attr) ||
+ attr.state != IB_PORT_ACTIVE) {
+ priv->counters[XVE_IB_PORT_NOT_ACTIVE]++;
+ xve_dbg_mcast(priv,
+ "%s Keeping carrier off until IB port is active\n",
+ __func__);
+ xve_put_ctx(priv);
+ return;
+ }
+
+ priv->counters[XVE_MCAST_CARRIER_TASK]++;
+ /*
+ * Take rtnl_lock to avoid racing with xve_stop() and
+ * turning the carrier back on while a device is being
+ * removed.
+ */
+ rtnl_lock();
+ if (!netif_carrier_ok(priv->netdev) && priv->broadcast &&
+ (test_bit(XVE_MCAST_FLAG_ATTACHED, &priv->broadcast->flags))) {
+ xve_dbg_mcast(priv, "XVE: %s Sending netif carrier on to %s\n",
+ __func__, priv->xve_name);
+ handle_carrier_state(priv, 1);
+ }
+ rtnl_unlock();
+ xve_put_ctx(priv);
+}
+
+void xve_mcast_dev_flush(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ LIST_HEAD(remove_list);
+ struct xve_mcast *mcast, *tmcast;
+ unsigned long flags;
+
+ xve_dbg_mcast(priv, "flushing multicast list\n");
+
+ spin_lock_irqsave(&priv->lock, flags);
+ list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
+ list_del(&mcast->list);
+ rb_erase(&mcast->rb_node, &priv->multicast_tree);
+ list_add_tail(&mcast->list, &remove_list);
+ mcast->used = jiffies;
+ }
+
+ if (priv->broadcast) {
+ rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
+ list_add_tail(&priv->broadcast->list, &remove_list);
+ priv->broadcast = NULL;
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
+ mcast->used = jiffies;
+ xve_mcast_leave(dev, mcast);
+ xve_mcast_free(mcast);
+ }
+
+}
+
+void xve_mcast_restart_task(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_MCASTRESTART, 0);
+ struct net_device *dev = priv->netdev;
+
+ xve_dbg_mcast(priv, "%s Restarting mcast thread for state[%ld ]\n",
+ __func__, priv->flags);
+ xve_mcast_stop_thread(dev, 0);
+ xve_mcast_start_thread(dev);
+ xve_put_ctx(priv);
+}
+
+void xve_mcast_leave_task(struct work_struct *work)
+{
+ struct xve_dev_priv *priv =
+ xve_get_wqctx(work, XVE_WQ_FINISH_MCASTLEAVE, 2);
+ struct net_device *dev = priv->netdev;
+ struct xve_mcast *mcast, *tmcast;
+ LIST_HEAD(remove_list);
+
+ if (!test_bit(XVE_MCAST_RUN_GC, &priv->flags))
+ return;
+
+ priv->counters[XVE_MCAST_LEAVE_TASK]++;
+
+ if (xve_mc_sendonly_timeout > 0) {
+ list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list,
+ list) {
+ if (test_bit(XVE_MCAST_FLAG_SENDONLY, &mcast->flags)
+ && time_before(mcast->used,
+ jiffies -
+ xve_mc_sendonly_timeout * HZ)) {
+ rb_erase(&mcast->rb_node,
+ &priv->multicast_tree);
+ list_move_tail(&mcast->list, &remove_list);
+ }
+ }
+
+ list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
+ xve_mcast_leave(dev, mcast);
+ xve_mcast_free(mcast);
+ }
+ }
+
+ xve_queue_complete_work(priv, XVE_WQ_START_MCASTLEAVE, 60 * HZ);
+
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+static int xs_seq_file;
+module_param(xs_seq_file, int, 0644);
+
+MODULE_PARM_DESC(xs_seq_file,
+ "Enabling the sequence files to print large data in /proc entries");
+
+static char *glob_counter_name[XVE_MAX_GLOB_COUNTERS] = {
+ "sync end del count:\t\t",
+ "vnic install count:\t\t",
+ "vnic del count:\t\t\t",
+ "vnic del novid count:\t\t",
+ "vnic update count:\t\t",
+ "vnic sync begin count:\t\t",
+ "vnic sync end count:\t\t",
+ "vnic oper req count:\t\t",
+ "vnic unsup cmd count:\t\t",
+ "iscsi info count:\t\t",
+ "xscore device remove count:\t",
+ "vnic stats req count:\t",
+ "number of pages allocated:\t",
+};
+
+static char *counter_name[XVE_MAX_COUNTERS] = {
+ "heartbeat_count:\t\t",
+ "hbeat send error count:\t\t",
+ "state_machine count:\t\t",
+ "state_machine_up count:\t\t",
+ "state_machine_down count:\t",
+ "napi_poll_count:\t\t",
+ "short_tx_pkt_count:\t\t",
+ "tx_skb_count:\t\t\t",
+ "tx skb free count:\t\t",
+ "tx vlan count:\t\t\t",
+ "tx error count:\t\t\t",
+ "tx wrb exhaust:\t\t\t",
+ "tx drop oper down count:\t",
+ "tx drop skb error count:\t",
+ "tx drop ring full count:\t",
+ "tx wake up count\t\t",
+ "tx queue stop count:\t\t",
+ "rx_skb_count:\t\t\t",
+ "rx_skb_alloc_count:\t\t",
+ "rx_smallskb_alloc_count:\t",
+ "rx_skb_freed_count:\t\t",
+ "rx skb offload count:\t\t",
+ "rx skb offl frag count:\t\t",
+ "rx skb offlnonipv4 count:\t",
+ "rx error count:\t\t\t",
+ "rx quota exceeded count:\t",
+ "rx no buf count:\t\t",
+ "napi sched count:\t\t",
+ "napi notsched count:\t\t",
+ "napi resched count:\t\t",
+ "open count:\t\t\t",
+ "stop count:\t\t\t",
+ "getstats count:\t\t\t",
+ "set mcast count:\t\t",
+ "vlan add count:\t\t\t",
+ "vlan del count:\t\t\t",
+ "ioctl count:\t\t\t",
+ "wdog timeout count:\t\t",
+ "oper req count:\t\t\t",
+ "admin up count:\t\t\t",
+ "admin down count:\t\t",
+ "sm poll count:\t\t\t",
+ "qp error count:\t\t\t",
+ "IB recovery count:\t\t",
+ "IB recovered count:\t\t",
+ "IB link down count:\t\t",
+ "IB link up count:\t\t",
+ "IB HCA port not active:\t\t",
+ "sent oper up count:\t\t",
+ "sent oper down count:\t\t",
+ "sent oper state failure count:\t",
+ "sent oper state success count:\t",
+ "drop standby count:\t\t",
+ "mac learn count:\t\t",
+ "mac aged count:\t\t\t",
+ "mac aged check count:\t\t",
+ "mac aged match not found:\t",
+ "mac aged still in use:\t\t",
+ "mac moved count:\t\t",
+ "mcast join task count:\t\t",
+ "mcast leave task count:\t\t",
+ "mcast carrier task count:\t",
+ "tx ud count:\t\t\t",
+ "tx rc count:\t\t\t",
+ "tx mcast count:\t\t\t",
+ "tx arp count:\t\t\t",
+ "tx ndp count:\t\t\t",
+ "tx arp vlan count:\t\t",
+ "tx ndp vlan count:\t\t",
+ "tx ud flood count:\t\t",
+ "tx rc flood count:\t\t",
+ "tx queue count:\t\t\t",
+ "tx path not found:\t\t",
+ "rx path not setup:\t\t",
+ "tx ah not found:\t\t",
+ "pathrec query count:\t\t",
+ "pathrec resp count:\t\t",
+ "pathrec resp err count:\t\t",
+ "ib sm_change count:\t\t",
+ "ib client_reregister count:\t",
+ "ib port_err count:\t\t",
+ "ib port_active count:\t\t",
+ "ib lid_active count:\t\t",
+ "ib pkey_change count:\t\t",
+ "ib invalid count:\t\t",
+};
+
+static char *misc_counter_name[XVE_MISC_MAX_COUNTERS] = {
+ "start pkey poll:\t\t",
+ "complete pkey poll:\t\t",
+ "start ah reap:\t\t\t",
+ "complete reap:\t\t\t",
+ "start fwt_aging:\t\t",
+ "complete fwt_aging:\t\t",
+ "start mcast join:\t\t",
+ "complete mcast join\t\t",
+ "start mcast leave:\t\t",
+ "complete mcast leave:\t\t",
+ "start mcast on:\t\t\t",
+ "complete mcast on:\t\t",
+ "start mcast restart:\t\t",
+ "complete mcast restart:\t\t",
+ "start flush light:\t\t",
+ "complete flush light:\t\t",
+ "start flush normal:\t\t",
+ "complete flush normal:\t\t",
+ "start flush heavy:\t\t",
+ "complete flush heavy:\t\t",
+ "start cm stale:\t\t",
+ "complete cm stale:\t\t",
+ "start cm tx start:\t\t",
+ "complete cm work start:\t\t",
+ "start cm tx reap:\t\t",
+ "complete cm work tx reap:\t",
+ "start cm rx reap:\t\t",
+ "complete cm work rx reap:\t",
+ "Workqueue not scheded:\t\t",
+ "Workqueue sched invalid:\t",
+ "WorkQueue sched failed:\t\t",
+};
+
+#define atoi(str) kstrtoul(((str != NULL) ? str : ""), -1, 0)
+#define XS_RESCHED_NAPI "napi_sched"
+#define XS_READIB_BUF "read_ibbuf"
+#define XS_RXBATCHING_ON "rbatch on"
+#define XS_RXBATCHING_OFF "rbatch off"
+
+struct proc_dir_entry *proc_root_xve = NULL;
+struct proc_dir_entry *proc_root_xve_dev = NULL;
+
+static int xve_proc_open_device(struct inode *inode, struct file *file);
+static int xve_proc_read_device(struct seq_file *m, void *data);
+static ssize_t xve_proc_write_device(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp);
+static int xve_proc_open_debug(struct inode *inode, struct file *file);
+static int xve_proc_read_debug(struct seq_file *m, void *data);
+static ssize_t xve_proc_write_debug(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp);
+static void *xve_seq_start(struct seq_file *seq, loff_t *pos);
+static void *xve_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+static int xve_seq_show(struct seq_file *seq, void *v);
+static void xve_seq_stop(struct seq_file *seq, void *v);
+static int xve_seq_open(struct inode *inode, struct file *sfile);
+static int xve_proc_open_device_counters(struct inode *inode,
+ struct file *file);
+static int xve_proc_read_device_counters(struct seq_file *m, void *data);
+static ssize_t xve_proc_write_device_counters(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp);
+static int xve_proc_l2_open_device(struct inode *inode, struct file *file);
+static int xve_proc_l2_read_device(struct seq_file *m, void *data);
+static ssize_t xve_proc_l2_write_device(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp);
+static int xve_proc_open_l2_flush(struct inode *inode, struct file *file);
+static int xve_proc_read_l2_flush(struct seq_file *m, void *data);
+static ssize_t xve_proc_write_l2_flush(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp);
+
+static const struct file_operations xve_debug_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xve_proc_open_debug,
+ .read = seq_read,
+ .write = xve_proc_write_debug,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations xve_device_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xve_proc_open_device,
+ .read = seq_read,
+ .write = xve_proc_write_device,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations xve_device_counters_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xve_proc_open_device_counters,
+ .read = seq_read,
+ .write = xve_proc_write_device_counters,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static const struct file_operations xve_file_ops = {
+ .owner = THIS_MODULE,
+ .open = xve_seq_open,
+ .read = seq_read,
+ .write = xve_proc_write_device_counters,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static const struct seq_operations xve_seq_ops = {
+ .start = xve_seq_start,
+ .next = xve_seq_next,
+ .stop = xve_seq_stop,
+ .show = xve_seq_show
+};
+
+static const struct file_operations xve_l2_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xve_proc_l2_open_device,
+ .read = seq_read,
+ .write = xve_proc_l2_write_device,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations xve_l2_flush_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = xve_proc_open_l2_flush,
+ .read = seq_read,
+ .write = xve_proc_write_l2_flush,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static int xve_proc_l2_read_device(struct seq_file *m, void *data)
+{
+ struct xve_fwt_entry *fwt_entry;
+ struct xve_dev_priv *vp = m->private;
+ struct xve_fwt_s *xve_fwt;
+ struct hlist_head *head;
+ struct hlist_node *n;
+ int i, j, k;
+ char tmp_buf[512];
+ char *smac;
+
+ xve_fwt = &vp->xve_fwt;
+ seq_printf(m,
+ "Id\tVLAN\tHash\tMAC\t\t\tGUID\t\t\tCMState\t\tQP\tVersion\t\tTx Mb/s\tRx Mb/s\n");
+ seq_printf(m,
+ "=======================================================");
+ seq_puts(m, "==========================");
+ seq_puts(m, "====================================================\n");
+
+ for (i = vp->sindex, j = vp->jindex; i < XVE_FWT_HASH_LISTS; i++) {
+ head = &xve_fwt->fwt[i];
+ k = 0;
+ hlist_for_each_entry_safe(fwt_entry, n, head, hlist) {
+ if (xve_fwt_entry_valid(xve_fwt, fwt_entry) == true) {
+ char *cmstr = "Not Connected";
+ u16 printed = 0;
+ struct xve_cm_ctx *tx = NULL, *rx = NULL;
+
+ j++;
+ k++;
+ smac = fwt_entry->smac_addr;
+ tmp_buf[0] = 0;
+ print_mgid_buf(tmp_buf,
+ (char *)(fwt_entry->dgid.raw));
+ if (fwt_entry->path) {
+ tx = xve_cmtx_get(fwt_entry->path);
+ rx = xve_cmrx_get(fwt_entry->path);
+ if (tx) {
+ u32 rx_rate = 0;
+
+ if (test_bit
+ (XVE_FLAG_OPER_UP,
+ &tx->flags))
+ cmstr = "Connected";
+ if (rx)
+ rx_rate =
+ rx->stats.rx_rate;
+ seq_printf(m,
+ "%d\t%d\t%d\t%2x:%2x:%2x:%2x:%2x:%2x\t%s\t%s\t%x\t%s\t%d\t%d\n",
+ j, fwt_entry->vlan,
+ fwt_entry->
+ hash_value,
+ ALIGN_TO_FF(smac[0]),
+ ALIGN_TO_FF(smac[1]),
+ ALIGN_TO_FF(smac[2]),
+ ALIGN_TO_FF(smac[3]),
+ ALIGN_TO_FF(smac[4]),
+ ALIGN_TO_FF(smac[5]),
+ tmp_buf + 8, cmstr,
+ tx->qp ? tx->qp->
+ qp_num : 0,
+ tx->version,
+ tx->stats.tx_rate,
+ rx_rate);
+ printed = 1;
+ }
+ }
+
+ if (!printed) {
+ char buffer[512];
+
+ buffer[0] = 0;
+ sprintf(buffer,
+ "NC Path-%s CM(Tx-%s Rx-%s) ",
+ (fwt_entry->path !=
+ NULL) ? "Yes" : "No",
+ (tx != NULL) ? "Yes" : "No",
+ (rx != NULL) ? "Yes" : "No");
+ seq_printf(m,
+ "%d\t%d\t%d\t%2x:%2x:%2x:%2x:%2x:%2x\t%s\t%s\n",
+ j, fwt_entry->vlan,
+ fwt_entry->hash_value,
+ ALIGN_TO_FF(smac[0]),
+ ALIGN_TO_FF(smac[1]),
+ ALIGN_TO_FF(smac[2]),
+ ALIGN_TO_FF(smac[3]),
+ ALIGN_TO_FF(smac[4]),
+ ALIGN_TO_FF(smac[5]),
+ tmp_buf + 8, buffer);
+ }
+ xve_fwt_put_ctx(&vp->xve_fwt, fwt_entry);
+ }
+ }
+
+ }
+
+ if (i >= XVE_FWT_HASH_LISTS) {
+ vp->sindex = 0;
+ vp->jindex = 0;
+ seq_puts(m, "\n End of L2 Table\n");
+ } else {
+ seq_puts(m, "\n Table incomplete\n");
+ vp->sindex = i;
+ vp->jindex = j;
+ }
+ return 0;
+}
+
+static ssize_t xve_proc_l2_write_device(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+ return count;
+}
+
+static int xve_proc_l2_open_device(struct inode *inode, struct file *file)
+{
+ return single_open(file, xve_proc_l2_read_device, PDE_DATA(inode));
+}
+
+static int xve_proc_read_device(struct seq_file *m, void *data)
+{
+ struct xve_dev_priv *vp = m->private;
+ int i;
+ unsigned long tsecs = 0, tmins = 0, thrs = 0;
+ char tmp_buf[512];
+ char *bcast_mgid_token = vp->bcast_mgid.raw;
+ char *local_gid_token = vp->local_gid.raw;
+
+ if (xve_get_misc_info()) {
+ if (vp->next_page) {
+ for (i = 0; i < XVE_MISC_MAX_COUNTERS; i++)
+ seq_printf(m, "%s%u\n", misc_counter_name[i],
+ vp->misc_counters[i]);
+ vp->next_page = 0;
+ goto out;
+ }
+ }
+
+ seq_printf(m, "Chassis Name:\t\t\t%s\n", vp->xsmp_info.chassis_name);
+ seq_printf(m, "Chassis Version :\t\t%x\n", vp->xsmp_info.version);
+ seq_printf(m, "Server-Profile:\t\t\t%s\n", vp->xsmp_info.session_name);
+ seq_puts(m, "Config parameters:\n");
+ seq_printf(m, "Mode :\t\t\t\t%s\n", vp->mode);
+ seq_printf(m, "Netid :\t\t\t\t0x%x\n", vp->net_id);
+ if (vp->qp)
+ seq_printf(m, "UD Queue pair Number(QP): \t%d\n",
+ (vp->qp->qp_num));
+ else
+ seq_printf(m,
+ "UD Queue pair Number(QP) Not established yet \t\t\n");
+
+ seq_printf(m, "PortDetails:\t\t\tPort:%d pkey:%d pkey_index:%d\n",
+ vp->port, vp->pkey, vp->pkey_index);
+
+ tmp_buf[0] = 0;
+ print_mgid_buf(tmp_buf, bcast_mgid_token);
+ seq_printf(m, "Bcast Mgid:\t\t\t%s\n", tmp_buf);
+
+ tmp_buf[0] = 0;
+ print_mgid_buf(tmp_buf, local_gid_token);
+
+ seq_printf(m, "Local gid:\t\t\t%s\n", tmp_buf);
+ seq_printf(m, "MAC addr:\t\t\t0x%Lx\n", vp->mac);
+ seq_printf(m, "VID:\t\t\t\t0x%Lx\n", vp->resource_id);
+ seq_printf(m, "mtu:\t\t\t\t%d\n", vp->netdev->mtu);
+ seq_printf(m, "Admin mtu:\t\t\t%d\n", vp->admin_mtu);
+ seq_printf(m, "MCAST mtu:\t\t\t%d\n", vp->mcast_mtu);
+ seq_printf(m, "IB MAX MTU: \t\t\t%d\n", vp->max_ib_mtu);
+
+ seq_printf(m, "Receive Queue size: \t\t%d\n", xve_recvq_size);
+ seq_printf(m, "Transmit Queue size: \t\t%d\n", xve_sendq_size);
+
+ if (vp->cm_supported) {
+ seq_printf(m, "Num of cm frags: \t\t%d\n", vp->cm.num_frags);
+ seq_printf(m, "CM mtu \t\t\t%d\n", vp->cm.max_cm_mtu);
+ }
+
+ seq_puts(m, "\n");
+ seq_printf(m, "link/xsmp hndl:\t\t\t%p\n", vp->xsmp_hndl);
+ seq_printf(m, "Port link state: \t\t%s\n",
+ test_bit(XVE_PORT_LINK_UP, &vp->state) ? "Up" : "Down");
+
+ if (vp->broadcast) {
+ seq_puts(m, "Multicast Report:\n");
+ seq_printf(m, "Flag: \t\t%lx\n",
+ vp->broadcast->flags);
+ seq_printf(m, "join state: \t\t%s\n",
+ test_bit(XVE_MCAST_FLAG_ATTACHED,
+ &vp->broadcast->
+ flags) ? "Joined" : "Not joined");
+ } else {
+ seq_puts(m, "Multicast Not created:\n");
+ }
+
+ strcpy(tmp_buf, "None");
+ if (vp->mp_flag & MP_XVE_PRIMARY) {
+ strcpy(tmp_buf, "Primary");
+ if (vp->mp_flag & MP_XVE_AUTO_SWITCH)
+ strcat(tmp_buf, " + AutoSwitchover");
+ } else if (vp->mp_flag & MP_XVE_SECONDARY) {
+ strcpy(tmp_buf, "Secondary");
+ if (vp->mp_flag & MP_XVE_AUTO_SWITCH)
+ strcat(tmp_buf, " + AutoSwitchover");
+ }
+
+ seq_printf(m, "HA flags:\t\t\t%s\n", tmp_buf);
+ seq_printf(m, "TSO:\t\t\t\t%s\n",
+ (vp->netdev->
+ features & NETIF_F_TSO) ? "Enabled" : "Disabled");
+ seq_printf(m, "LRO:\t\t\t\t%s\n",
+ (vp->netdev->
+ features & NETIF_F_LRO) ? "Enabled" : "Disabled");
+
+ if (test_bit(XVE_OPER_REP_SENT, &vp->state)) {
+
+ tsecs = jiffies_to_msecs(jiffies - vp->jiffies) / 1000;
+ thrs = tsecs / (60 * 60);
+ tmins = (tsecs / 60 - (thrs * 60));
+ tsecs = tsecs - (tmins * 60) - (thrs * 60 * 60);
+ }
+
+ seq_printf(m, "XVE Uptime:\t\t\t%lu hrs %lu mins %lu seconds\n",
+ thrs, tmins, tsecs);
+ seq_puts(m, "\n");
+
+ seq_printf(m, "Netdev state:\t\t\t0x%lu\n", vp->netdev->state);
+ seq_printf(m, "Netdev napi state:\t\t0x%lu\n", vp->napi.state);
+ seq_printf(m, "VNIC state:\t\t\t0x%x\n", (unsigned int)vp->state);
+ seq_printf(m, "VNIC Flag:\t\t\t0x%x\n", (unsigned int)vp->flags);
+
+ tmp_buf[0] = 0;
+ if (netif_running(vp->netdev))
+ strcat(tmp_buf, "dev running");
+ else
+ strcat(tmp_buf, "netif not running");
+ if (netif_queue_stopped(vp->netdev))
+ strcat(tmp_buf, " + dev stopped");
+ else
+ strcat(tmp_buf, " + dev not stopped");
+
+ seq_printf(m, "%s\n\n", tmp_buf);
+
+ seq_printf(m, "Carrier state:\t\t\t%s\n",
+ netif_carrier_ok(vp->netdev) ? "Up" : "Down");
+
+ seq_printf(m, "VNIC up:\t\t\t%s\n",
+ test_bit(XVE_OPER_UP, &vp->state) ? "Yes" : "No");
+
+ tmp_buf[0] = 0;
+ if (test_bit(XVE_OPER_UP, &vp->state))
+ strcat(tmp_buf, "Oper Up");
+ else
+ strcat(tmp_buf, "Oper Down");
+ if (test_bit(XVE_OS_ADMIN_UP, &vp->state))
+ strcat(tmp_buf, " + OS Admin Up");
+ else
+ strcat(tmp_buf, " + OS Admin Down");
+ if (test_bit(XVE_PORT_LINK_UP, &vp->state))
+ strcat(tmp_buf, " + Port Link Up");
+ else
+ strcat(tmp_buf, " + Port Link Down");
+ if (test_bit(XVE_OPER_REP_SENT, &vp->state))
+ strcat(tmp_buf, " + Oper Sent");
+ else
+ strcat(tmp_buf, " + No Oper Rep");
+
+ if (test_bit(XVE_INTR_ENABLED, &vp->state))
+ strcat(tmp_buf, " + Rx Intr Enabled");
+ else
+ strcat(tmp_buf, " + Rx Intr Disabled");
+
+ if (test_bit(XVE_RX_NOBUF, &vp->state))
+ strcat(tmp_buf, " + Rx No Buf");
+
+ if (test_bit(XVE_IBLINK_DOWN, &vp->state))
+ strcat(tmp_buf, " + IB Link Down");
+ else
+ strcat(tmp_buf, " + IB Link Up");
+
+ if (test_bit(XVE_IB_DEV_OPEN, &vp->flags))
+ strcat(tmp_buf, " + IB Device Opened");
+ else
+ strcat(tmp_buf, " + IB Device Not Opened");
+
+ if (test_bit(XVE_OVER_QUOTA, &vp->state))
+ strcat(tmp_buf, " + No RX Quota");
+
+ seq_printf(m, "%s\n\n", tmp_buf);
+
+ if (vp->work_queue_failed != 0)
+ seq_printf(m, "WQ Failed:\t\t\t%ld\n", vp->work_queue_failed);
+
+ seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared);
+ vp->next_page = 1;
+out:
+ return 0;
+}
+
+static ssize_t xve_proc_write_device(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp)
+{
+ struct xve_dev_priv *vp = PDE_DATA(file_inode(file));
+ char action[64];
+ int ret;
+
+ ret = sscanf(buffer, "%s", action);
+ if (ret != 1) {
+ return -EINVAL;
+ }
+ if ((strlen(action) == 1) && (atoi(action) == 0)) {
+ /* Clear counters */
+ memset(vp->counters, 0, sizeof(vp->counters));
+ vp->counters_cleared++;
+ return count;
+ }
+
+ /*
+ * sscanf cannot copies spaces as in "rbatch on" so do a copy
+ */
+ memset(action, 0, sizeof(action));
+ strncpy(action, buffer, 10);
+
+ if (strcmp(action, XS_RESCHED_NAPI) == 0)
+ set_bit(XVE_TRIGGER_NAPI_SCHED, &vp->state);
+
+ return count;
+}
+
+static int xve_proc_open_device(struct inode *inode, struct file *file)
+{
+ return single_open(file, xve_proc_read_device, PDE_DATA(inode));
+}
+
+static int xve_proc_read_device_counters(struct seq_file *m, void *data)
+{
+ struct xve_dev_priv *vp = (struct xve_dev_priv *)m->private;
+ int i;
+
+ for (i = 0; i < XVE_MAX_COUNTERS; i++)
+ seq_printf(m, "%s%u\n", counter_name[i], vp->counters[i]);
+ seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared);
+
+ return 0;
+}
+
+static ssize_t xve_proc_write_device_counters(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *offp)
+{
+ struct xve_dev_priv *vp = PDE_DATA(file_inode(file));
+ int newval, ret;
+ char *buf = (char *) __get_free_page(GFP_USER);
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(buf, buffer, count - 1)) {
+ goto out;
+ }
+ buf[count] = '\0';
+
+ ret = kstrtoint(buf, 0, &newval);
+ if (ret != 0) {
+ return -EINVAL;
+ }
+
+ if (newval == 0) {
+ /* Clear counters */
+ memset(vp->counters, 0, sizeof(vp->counters));
+ vp->counters_cleared++;
+ }
+ return count;
+
+out:
+ free_page((unsigned long)buf);
+ return -EINVAL;
+}
+
+static int xve_proc_open_device_counters(struct inode *inode, struct file *file)
+{
+ return single_open(file, xve_proc_read_device_counters,
+ PDE_DATA(inode));
+}
+
+static int xve_proc_read_l2_flush(struct seq_file *m, void *data)
+{
+ seq_puts(m, "flush: Nothing to read\n");
+ return 0;
+}
+
+static ssize_t xve_proc_write_l2_flush(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp)
+{
+ struct xve_dev_priv *priv = PDE_DATA(file_inode(file));
+
+ pr_info("%s XVE flushing l2 %s\n", __func__, priv->xve_name);
+ xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+
+ return count;
+}
+
+static int xve_proc_open_l2_flush(struct inode *inode, struct file *file)
+{
+ return single_open(file, xve_proc_read_l2_flush, PDE_DATA(inode));
+}
+
+static void *xve_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ return (*pos < XVE_MAX_COUNTERS) ? &counter_name[*pos] : 0;
+}
+
+static void *xve_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ ++*pos;
+ return (*pos < XVE_MAX_COUNTERS) ? &counter_name[*pos] : 0;
+}
+
+static int xve_seq_show(struct seq_file *seq, void *v)
+{
+ struct xve_dev_priv *vp = seq->private;
+
+ if (vp->ix == XVE_MAX_COUNTERS)
+ vp->ix = 0;
+
+ seq_printf(seq, "%s %u\n", counter_name[vp->ix], vp->counters[vp->ix]);
+ vp->ix++;
+
+ return 0;
+}
+
+static void xve_seq_stop(struct seq_file *seq, void *v)
+{
+ /* Nothing to be done here */
+}
+
+static int xve_seq_open(struct inode *inode, struct file *sfile)
+{
+ struct seq_file *seq;
+ int ret_val;
+
+ ret_val = seq_open(sfile, &xve_seq_ops);
+ if (!ret_val) {
+ /* recover the pointer buried in proc_dir_entry data */
+ seq = sfile->private_data;
+ seq->private = PDE_DATA(inode);
+ }
+
+ return ret_val;
+};
+
+int xve_add_proc_entry(struct xve_dev_priv *vp)
+{
+ struct proc_dir_entry *file, *l2, *flush, *counter;
+ int ret = 0;
+
+ vp->nic_dir = xg_create_proc_entry(vp->proc_name, S_IFDIR,
+ proc_root_xve_dev, 1);
+
+ if (!vp->nic_dir) {
+ pr_info("Unable to create the xve nicentry\n");
+ return -ENOMEM;
+ }
+ file = proc_create_data(vp->xve_name, S_IFREG, vp->nic_dir,
+ &xve_device_proc_fops, vp);
+ if (!file) {
+ pr_info("Unable to create the xve /proc entry\n");
+ ret = -ENOMEM;
+ goto err_dev_entry;
+ }
+ if (xs_seq_file) {
+ /* Using proc seq_file for OVM */
+ counter = proc_create_data("counters", S_IFREG, vp->nic_dir,
+ &xve_file_ops, vp);
+ } else
+ counter = proc_create_data("counters", S_IFREG, vp->nic_dir,
+ &xve_device_counters_proc_fops, vp);
+ if (!counter) {
+ pr_info("Unable to create the xve /proc entry\n");
+ return -ENOMEM;
+ goto err_counter;
+ }
+
+ l2 = proc_create_data("l2table", S_IFREG, vp->nic_dir,
+ &xve_l2_proc_fops, vp);
+ if (!l2) {
+ pr_info("Unable to create the xve /proc l2 entry\n");
+ ret = -ENOMEM;
+ goto err_l2table;
+ }
+ /*
+ * Create flush entry
+ */
+ flush = proc_create_data("flush_l2", S_IFREG, vp->nic_dir,
+ &xve_l2_flush_proc_fops, vp);
+ if (!flush) {
+ pr_info("Unable to create the xve /proc flush entry\n");
+ ret = -ENOMEM;
+ goto err_flush;
+ }
+ return 0;
+err_counter:
+ remove_proc_entry("counters", vp->nic_dir);
+err_flush:
+ remove_proc_entry("l2table", vp->nic_dir);
+err_l2table:
+ remove_proc_entry(vp->xve_name, vp->nic_dir);
+err_dev_entry:
+ remove_proc_entry(vp->proc_name, proc_root_xve_dev);
+ return ret;
+}
+
+void xve_remove_proc_entry(struct xve_dev_priv *vp)
+{
+ remove_proc_entry("counters", vp->nic_dir);
+ remove_proc_entry("flush_l2", vp->nic_dir);
+ remove_proc_entry("l2table", vp->nic_dir);
+ remove_proc_entry(vp->xve_name, vp->nic_dir);
+ remove_proc_entry(vp->proc_name, proc_root_xve_dev);
+}
+
+static ssize_t xve_proc_write_debug(struct file *file,
+ const char __user *buffer, size_t count,
+ loff_t *offp)
+{
+ int newval, ret;
+ char *buf = (char *) __get_free_page(GFP_USER);
+ if (!buf) {
+ return -ENOMEM;
+ }
+
+ if (copy_from_user(buf, buffer, count - 1)) {
+ goto out;
+ }
+ buf[count] = '\0';
+
+ ret = kstrtoint(buf, 0, &newval);
+ if (ret != 0) {
+ return -EINVAL;
+ }
+ xve_debug_level = newval;
+ return count;
+
+out:
+ free_page((unsigned long)buf);
+ return -EINVAL;
+}
+
+static int xve_proc_read_debug(struct seq_file *m, void *data)
+{
+ int i;
+
+ seq_printf(m, "Debug bitmask: 0x%x\n\n", xve_debug_level);
+ for (i = 0; i < XVE_MAX_GLOB_COUNTERS; i++)
+ seq_printf(m, "%s%d\n", glob_counter_name[i], xve_counters[i]);
+ return 0;
+}
+
+static int xve_proc_open_debug(struct inode *inode, struct file *file)
+{
+ return single_open(file, xve_proc_read_debug, PDE_DATA(inode));
+}
+
+int xve_create_procfs_root_entries(void)
+{
+ struct proc_dir_entry *debug_file;
+ int ret = 0;
+
+ proc_root_xve =
+ xg_create_proc_entry("driver/xve", S_IFDIR, NULL, 0);
+
+ if (!proc_root_xve) {
+ pr_info("Unable to create /proc/driver/xve\n");
+ return -ENOMEM;
+ }
+
+ proc_root_xve_dev = xg_create_proc_entry("devices", S_IFDIR,
+ proc_root_xve, 1);
+ if (!proc_root_xve_dev) {
+ pr_info("Unable to create /proc/driver/xve/devices\n");
+ ret = -ENOMEM;
+ goto create_proc_end_1;
+ }
+ debug_file = proc_create_data("debug", S_IFREG, proc_root_xve,
+ &xve_debug_proc_fops, NULL);
+ if (!debug_file) {
+ pr_info("Unable to create /proc/driver/xve/debug\n");
+ ret = -ENOMEM;
+ goto create_proc_end_2;
+ }
+ return 0;
+
+create_proc_end_2:
+ remove_proc_entry("devices", proc_root_xve_dev);
+create_proc_end_1:
+ remove_proc_entry("driver/xve", NULL);
+ return ret;
+}
+
+void xve_remove_procfs_root_entries(void)
+{
+ remove_proc_entry("debug", proc_root_xve);
+ remove_proc_entry("devices", proc_root_xve);
+ xg_remove_proc_entry("driver/xve", NULL);
+}
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+#include <linux/pkt_sched.h>
+#include <linux/random.h>
+
+static int xve_age_path = 1;
+module_param(xve_age_path, int, 0644);
+MODULE_PARM_DESC(xve_age_path, "Age path enable/disable if no fwt entries");
+
+u32 xve_hash_salt __read_mostly;
+static struct kmem_cache *xve_fwt_cache __read_mostly;
+
+struct xve_advert_hdr {
+ __be16 type;
+ __be16 count;
+ union ib_gid gid;
+ u32 qpn;
+} __attribute__ ((__packed__));
+
+int xve_tables_init(void)
+{
+ get_random_bytes(&xve_hash_salt, sizeof(xve_hash_salt));
+ xve_fwt_cache =
+ kmem_cache_create("xve_fwt_cache", sizeof(struct xve_fwt_entry), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!xve_fwt_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void xve_fwt_init(struct xve_fwt_s *xve_fwt)
+{
+
+ int i;
+
+ spin_lock_init(&xve_fwt->lock);
+ for (i = 0; i < XVE_FWT_HASH_LISTS; i++)
+ INIT_HLIST_HEAD(&xve_fwt->fwt[i]);
+ xve_fwt->num = 0;
+
+}
+
+static int xve_mac_hash(const unsigned char *mac, int size, u16 vlan)
+{
+ return hash_bytes(mac, ETH_ALEN, vlan ^ xve_hash_salt) & (size - 1);
+}
+
+static struct xve_fwt_entry *xve_fwt_find_entry(struct hlist_head *head,
+ const unsigned char *mac,
+ u16 vlan)
+{
+ struct xve_fwt_entry *fwt_entry;
+
+ hlist_for_each_entry(fwt_entry, head, hlist) {
+ if (fwt_entry->vlan == vlan
+ && ether_addr_equal(fwt_entry->smac_addr, mac))
+ return fwt_entry;
+ }
+ return NULL;
+}
+
+static struct xve_fwt_entry *xve_fwt_find_valid(struct hlist_head *head)
+{
+ struct xve_fwt_entry *fwt_entry;
+
+ hlist_for_each_entry(fwt_entry, head, hlist) {
+ if (test_bit(XVE_FWT_ENTRY_VALID, &fwt_entry->state))
+ return fwt_entry;
+ }
+ return NULL;
+}
+
+struct xve_fwt_entry *xve_fwt_list(struct xve_fwt_s *xve_fwt, int val)
+{
+ struct hlist_head *head;
+ struct xve_fwt_entry *fwt_entry = NULL;
+ unsigned long flags = 0;
+
+ spin_lock_irqsave(&xve_fwt->lock, flags);
+ head = &xve_fwt->fwt[val];
+ if (head != NULL)
+ fwt_entry = xve_fwt_find_valid(head);
+ if (fwt_entry)
+ atomic_inc(&fwt_entry->ref_cnt);
+ spin_unlock_irqrestore(&xve_fwt->lock, flags);
+ return fwt_entry;
+}
+
+bool xve_fwt_entry_valid(struct xve_fwt_s *xve_fwt,
+ struct xve_fwt_entry *fwt_entry)
+{
+ int ret = true;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xve_fwt->lock, flags);
+ if ((fwt_entry != NULL)
+ && test_bit(XVE_FWT_ENTRY_VALID, &fwt_entry->state))
+ atomic_inc(&fwt_entry->ref_cnt);
+ else
+ ret = false;
+ spin_unlock_irqrestore(&xve_fwt->lock, flags);
+
+ return ret;
+}
+
+int xve_aging_task_machine(struct xve_dev_priv *priv)
+{
+ unsigned long flags;
+ struct xve_fwt_entry *fwt_entry;
+ struct xve_path *path;
+ struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+ int i;
+ char *smac;
+ union ib_gid dgid;
+ int is_list_empty = 0;
+ struct hlist_head *head;
+ struct hlist_node *n;
+
+ spin_lock_irqsave(&priv->lock, flags);
+ if (!test_bit(XVE_OS_ADMIN_UP, &priv->state) ||
+ test_bit(XVE_DELETING, &priv->state)) {
+ spin_unlock_irqrestore(&priv->lock, flags);
+ return 0;
+ }
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ priv->counters[XVE_MAC_AGED_CHECK]++;
+ for (i = 0; i < XVE_FWT_HASH_LISTS; i++) {
+ head = &xve_fwt->fwt[i];
+ hlist_for_each_entry_safe(fwt_entry, n, head, hlist) {
+ if (xve_fwt_entry_valid(xve_fwt, fwt_entry) == true) {
+ smac = fwt_entry->smac_addr;
+ if (!test_and_clear_bit
+ (XVE_FWT_ENTRY_REFRESH, &fwt_entry->state)
+ && ((jiffies - fwt_entry->last_refresh) >=
+ priv->aging_delay)) {
+ pr_info("XVE: %s MAC ", priv->xve_name);
+ pr_info("%02x:%02x:%02x:%02x:%02x:%02x",
+ ALIGN_TO_FF(smac[0]),
+ ALIGN_TO_FF(smac[1]),
+ ALIGN_TO_FF(smac[2]),
+ ALIGN_TO_FF(smac[3]),
+ ALIGN_TO_FF(smac[4]),
+ ALIGN_TO_FF(smac[5]));
+ pr_info(" vlan %d Aged out\n",
+ fwt_entry->vlan);
+ /*
+ * Can there be a race here where path
+ * becomes a bad address when paths
+ * gets flushed??
+ */
+ spin_lock_irqsave(&priv->lock, flags);
+ xve_remove_fwt_entry(priv, fwt_entry);
+ path = fwt_entry->path;
+ if (path) {
+ memcpy(dgid.raw,
+ path->pathrec.dgid.raw,
+ sizeof(dgid));
+ if (list_empty(&path->fwt_list))
+ is_list_empty = 1;
+ }
+ spin_unlock_irqrestore(&priv->lock,
+ flags);
+ if (xve_age_path && is_list_empty)
+ xve_flush_single_path_by_gid
+ (priv->netdev, &dgid);
+ xve_fwt_put_ctx(xve_fwt, fwt_entry);
+ xve_fwt_entry_free(priv, fwt_entry);
+ priv->counters[XVE_MAC_AGED_COUNTER]++;
+ } else {
+ priv->counters[XVE_MAC_STILL_INUSE]++;
+ xve_fwt_put_ctx(xve_fwt, fwt_entry);
+ }
+ } else {
+ priv->counters[XVE_MAC_AGED_NOMATCHES]++;
+ }
+ }
+ }
+
+ return 0;
+}
+
+struct xve_fwt_entry *xve_fwt_lookup(struct xve_fwt_s *xve_fwt, char *mac,
+ u16 vlan, int refresh)
+{
+ unsigned long flags;
+ struct hlist_head *head;
+ struct xve_fwt_entry *fwt_entry;
+
+ spin_lock_irqsave(&xve_fwt->lock, flags);
+ head = &xve_fwt->fwt[xve_mac_hash(mac, XVE_FWT_HASH_LISTS, vlan)];
+ fwt_entry = xve_fwt_find_entry(head, mac, vlan);
+ if (fwt_entry) {
+ atomic_inc(&fwt_entry->ref_cnt);
+ if (refresh)
+ set_bit(XVE_FWT_ENTRY_REFRESH, &fwt_entry->state);
+ fwt_entry->last_refresh = jiffies;
+ } else {
+ xve_debug(DEBUG_TABLE_INFO, NULL,
+ "%s No match for %02x%02x%02x%02x%02x%02x vlan %d\n",
+ __func__, mac[0], mac[1], mac[2], mac[3], mac[4],
+ mac[5], vlan);
+ }
+ spin_unlock_irqrestore(&xve_fwt->lock, flags);
+ return fwt_entry;
+}
+
+void xve_fwt_put_ctx(struct xve_fwt_s *xve_fwt, struct xve_fwt_entry *fwt_entry)
+{
+ if (fwt_entry)
+ atomic_dec(&fwt_entry->ref_cnt);
+}
+
+void xve_fwt_insert(struct xve_dev_priv *priv, struct xve_cm_ctx *ctx,
+ union ib_gid *gid, u32 qpn, char *smac, u16 vlan)
+{
+ struct hlist_head *head;
+ struct xve_fwt_entry *fwt_entry;
+ unsigned long flags, flags1;
+ struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+ struct xve_path *path;
+ char from[64], to[64];
+
+ fwt_entry = xve_fwt_lookup(xve_fwt, smac, vlan, 1);
+ if (fwt_entry) {
+ if (unlikely
+ (memcmp
+ (fwt_entry->dgid.raw, gid->raw, sizeof(union ib_gid)))) {
+ print_mgid_buf(from, (char *)fwt_entry->dgid.raw);
+ print_mgid_buf(to, (char *)gid->raw);
+ pr_info("XVE: %s MAC %02x:%02x:%02x:%02x:%02x:%02x ",
+ priv->xve_name, ALIGN_TO_FF(smac[0]),
+ ALIGN_TO_FF(smac[1]), ALIGN_TO_FF(smac[2]),
+ ALIGN_TO_FF(smac[3]), ALIGN_TO_FF(smac[4]),
+ ALIGN_TO_FF(smac[5]));
+ pr_info(" vlan %d moved from GID %s to GID %s\n",
+ fwt_entry->vlan, from, to);
+
+ priv->counters[XVE_MAC_MOVED_COUNTER]++;
+
+ memcpy(fwt_entry->dgid.raw, gid->raw,
+ sizeof(union ib_gid));
+ /*
+ * We need to hold priv->lock
+ */
+ spin_lock_irqsave(&priv->lock, flags);
+ spin_lock_irqsave(&xve_fwt->lock, flags1);
+ if (fwt_entry->path)
+ list_del(&fwt_entry->list);
+ fwt_entry->path = NULL;
+ path = __path_find(priv->netdev, gid->raw);
+ if (path) {
+ fwt_entry->path = path;
+ list_add_tail(&fwt_entry->list,
+ &path->fwt_list);
+ }
+ spin_unlock_irqrestore(&xve_fwt->lock, flags1);
+ spin_unlock_irqrestore(&priv->lock, flags);
+ }
+ if (qpn && unlikely(fwt_entry->dqpn != qpn))
+ fwt_entry->dqpn = qpn;
+ /* Insert CM rx in the path */
+ if (fwt_entry->path && ctx)
+ fwt_entry->path->cm_ctx_rx = ctx;
+ xve_fwt_put_ctx(xve_fwt, fwt_entry);
+ } else {
+ fwt_entry =
+ kmem_cache_alloc(xve_fwt_cache, GFP_ATOMIC | __GFP_ZERO);
+ if (!fwt_entry) {
+ pr_warn("xve_fwt_entry_alloc() failed\n");
+ return;
+ }
+ memset(fwt_entry, 0, sizeof(struct xve_fwt_entry));
+ print_mgid_buf(from, (char *)gid->raw);
+ pr_info("XVE: %s MAC %02x:%02x:%02x:%02x:%02x:%02x",
+ priv->xve_name, ALIGN_TO_FF(smac[0]),
+ ALIGN_TO_FF(smac[1]),
+ ALIGN_TO_FF(smac[2]), ALIGN_TO_FF(smac[3]),
+ ALIGN_TO_FF(smac[4]), ALIGN_TO_FF(smac[5]));
+ pr_info("vlan %d learned from GID %s, mode: %s Fwt %p\n",
+ vlan, from, qpn ? "UD" : "RC", fwt_entry);
+ priv->counters[XVE_MAC_LEARN_COUNTER]++;
+ memcpy(fwt_entry->dgid.raw, gid->raw, sizeof(union ib_gid));
+ fwt_entry->dqpn = qpn;
+ ether_addr_copy(fwt_entry->smac_addr, smac);
+ fwt_entry->vlan = vlan;
+ set_bit(XVE_FWT_ENTRY_REFRESH, &fwt_entry->state);
+ fwt_entry->last_refresh = jiffies;
+ set_bit(XVE_FWT_ENTRY_VALID, &fwt_entry->state);
+ spin_lock_irqsave(&xve_fwt->lock, flags);
+ fwt_entry->hash_value =
+ xve_mac_hash(smac, XVE_FWT_HASH_LISTS, vlan);
+ head =
+ &xve_fwt->fwt[xve_mac_hash(smac, XVE_FWT_HASH_LISTS, vlan)];
+ hlist_add_head(&fwt_entry->hlist, head);
+ xve_fwt->num++;
+ spin_unlock_irqrestore(&xve_fwt->lock, flags);
+ }
+}
+
+void xve_remove_fwt_entry(struct xve_dev_priv *priv,
+ struct xve_fwt_entry *fwt_entry)
+{
+ struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xve_fwt->lock, flags);
+ xve_debug(DEBUG_TABLE_INFO, priv, "%s Deleting FWT From list %p\n",
+ __func__, fwt_entry);
+ if (fwt_entry->path)
+ list_del(&fwt_entry->list);
+ hlist_del(&fwt_entry->hlist);
+ xve_fwt->num--;
+ spin_unlock_irqrestore(&xve_fwt->lock, flags);
+}
+
+void xve_fwt_entry_free(struct xve_dev_priv *priv,
+ struct xve_fwt_entry *fwt_entry)
+{
+ unsigned long begin;
+ /*
+ * Wait for refernce count to goto zero (Use kref which is better)
+ */
+ begin = jiffies;
+
+ while (atomic_read(&fwt_entry->ref_cnt)) {
+ xve_debug(DEBUG_TABLE_INFO, priv,
+ "%s Waiting for ref cnt to become zero %p\n",
+ __func__, fwt_entry);
+ if (time_after(jiffies, begin + 5 * HZ)) {
+ xve_warn(priv,
+ "timing out fwt_entry still in use %p\n",
+ fwt_entry);
+ break;
+ }
+ msleep(20);
+ }
+ kmem_cache_free(xve_fwt_cache, fwt_entry);
+}
+
+void xve_fwt_entry_destroy(struct xve_dev_priv *priv,
+ struct xve_fwt_entry *fwt_entry)
+{
+ xve_remove_fwt_entry(priv, fwt_entry);
+ xve_fwt_entry_free(priv, fwt_entry);
+}
+
+void xve_fwt_cleanup(struct xve_dev_priv *priv)
+{
+ struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+ int i;
+ struct hlist_head *head;
+ struct hlist_node *n;
+ struct xve_fwt_entry *fwt_entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&xve_fwt->lock, flags);
+ for (i = 0; i < XVE_FWT_HASH_LISTS; i++) {
+ head = &xve_fwt->fwt[i];
+ hlist_for_each_entry_safe(fwt_entry, n, head, hlist) {
+ hlist_del(&fwt_entry->hlist);
+ kmem_cache_free(xve_fwt_cache, fwt_entry);
+ xve_fwt->num--;
+ }
+ }
+ pr_info("XVE: %s Forwarding table cleaned up for %s",
+ __func__, priv->xve_name);
+ pr_info("number of entries %d\n", xve_fwt->num);
+ spin_unlock_irqrestore(&xve_fwt->lock, flags);
+}
+
+void xve_prepare_skb(struct xve_dev_priv *priv, struct sk_buff *skb)
+{
+ skb->protocol = eth_type_trans(skb, priv->netdev);
+ skb->dev = priv->netdev;
+ skb_pkt_type(skb, PACKET_HOST);
+ if (test_bit(XVE_FLAG_CSUM, &priv->flags))
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->truesize = skb->len + sizeof(struct sk_buff);
+}
+
+void xve_tables_exit(void)
+{
+ kmem_cache_destroy(xve_fwt_cache);
+}
--- /dev/null
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+int xve_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid,
+ int set_qkey)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_attr *qp_attr = NULL;
+ int ret;
+ u16 pkey_index;
+
+ if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
+ clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+ ret = -ENXIO;
+ goto out;
+ }
+ set_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+
+ if (set_qkey) {
+ ret = -ENOMEM;
+ qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
+ if (!qp_attr)
+ goto out;
+
+ /* set correct QKey for QP */
+ qp_attr->qkey = priv->qkey;
+ ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
+ if (ret) {
+ xve_warn(priv, "failed to modify QP, ret = %d\n", ret);
+ goto out;
+ }
+ }
+
+ /* attach QP to multicast group */
+ ret = ib_attach_mcast(priv->qp, mgid, mlid);
+ if (ret)
+ xve_warn(priv,
+ "failed to attach to multicast group, ret = %d\n",
+ ret);
+
+out:
+ kfree(qp_attr);
+ return ret;
+}
+
+int xve_init_qp(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int ret;
+ struct ib_qp_attr qp_attr;
+ int attr_mask;
+
+ if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags))
+ return -1;
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ qp_attr.qkey = 0;
+ qp_attr.port_num = priv->port;
+ qp_attr.pkey_index = priv->pkey_index;
+ attr_mask = IB_QP_QKEY | IB_QP_PORT | IB_QP_PKEY_INDEX | IB_QP_STATE;
+ ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to modify QP to init, ret = %d\n", ret);
+ goto out_fail;
+ }
+
+ qp_attr.qp_state = IB_QPS_RTR;
+ /* Can't set this in a INIT->RTR transition */
+ attr_mask &= ~IB_QP_PORT;
+ ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret);
+ goto out_fail;
+ }
+
+ qp_attr.qp_state = IB_QPS_RTS;
+ qp_attr.sq_psn = 0;
+ attr_mask |= IB_QP_SQ_PSN;
+ attr_mask &= ~IB_QP_PKEY_INDEX;
+ ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
+ if (ret) {
+ xve_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret);
+ goto out_fail;
+ }
+
+ return 0;
+
+out_fail:
+ qp_attr.qp_state = IB_QPS_RESET;
+ if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ xve_warn(priv, "Failed to modify QP to RESET state\n");
+
+ return ret;
+}
+
+int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ struct ib_qp_init_attr init_attr = {
+ .cap = {
+ .max_send_wr = xve_sendq_size,
+ .max_recv_wr = xve_recvq_size,
+ .max_send_sge = 1,
+ .max_recv_sge = XVE_UD_RX_SG},
+ .sq_sig_type = IB_SIGNAL_ALL_WR,
+ .qp_type = IB_QPT_UD
+ };
+
+ int ret, size;
+ int i;
+ struct ethtool_coalesce *coal;
+
+ priv->pd = ib_alloc_pd(priv->ca);
+ if (IS_ERR(priv->pd)) {
+ pr_warn("%s: failed to allocate PD for %s\n",
+ ca->name, priv->xve_name);
+ return -ENODEV;
+ }
+
+ priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
+ if (IS_ERR(priv->mr)) {
+ pr_warn("%s: ib_get_dma_mr failed\n", ca->name);
+ goto out_free_pd;
+ }
+
+ size = xve_recvq_size + 1;
+ ret = xve_cm_dev_init(dev);
+ if (ret != 0) {
+ pr_err("%s Failed for %s [ret %d ]\n", __func__,
+ priv->xve_name, ret);
+ goto out_free_mr;
+ }
+ size += xve_sendq_size;
+ size += xve_recvq_size + 1; /* 1 extra for rx_drain_qp */
+
+ priv->recv_cq =
+ ib_create_cq(priv->ca, xve_ib_completion, NULL, dev, size, 0);
+ if (IS_ERR(priv->recv_cq)) {
+ pr_warn("%s: failed to create receive CQ for %s\n",
+ ca->name, priv->xve_name);
+ goto out_free_mr;
+ }
+
+ priv->send_cq = ib_create_cq(priv->ca, xve_send_comp_handler, NULL,
+ dev, xve_sendq_size, 0);
+ if (IS_ERR(priv->send_cq)) {
+ pr_warn("%s: failed to create send CQ for %s\n",
+ ca->name, priv->xve_name);
+ goto out_free_recv_cq;
+ }
+
+ if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP))
+ goto out_free_send_cq;
+
+ coal = kzalloc(sizeof(*coal), GFP_KERNEL);
+ if (coal) {
+ coal->rx_coalesce_usecs = 10;
+ coal->tx_coalesce_usecs = 10;
+ coal->rx_max_coalesced_frames = 16;
+ coal->tx_max_coalesced_frames = 16;
+ dev->ethtool_ops->set_coalesce(dev, coal);
+ kfree(coal);
+ }
+
+ init_attr.send_cq = priv->send_cq;
+ init_attr.recv_cq = priv->recv_cq;
+
+ if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
+ init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+
+ if (dev->features & NETIF_F_SG)
+ init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+
+ priv->qp = ib_create_qp(priv->pd, &init_attr);
+ if (IS_ERR(priv->qp)) {
+ pr_warn("%s: failed to create QP\n", ca->name);
+ goto out_free_send_cq;
+ }
+
+ for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
+ priv->tx_sge[i].lkey = priv->mr->lkey;
+
+ priv->tx_wr.opcode = IB_WR_SEND;
+ priv->tx_wr.sg_list = priv->tx_sge;
+ priv->tx_wr.send_flags = IB_SEND_SIGNALED;
+
+ priv->rx_sge[0].lkey = priv->mr->lkey;
+ if (xve_ud_need_sg(priv->max_ib_mtu)) {
+ priv->rx_sge[0].length = XVE_UD_HEAD_SIZE;
+ priv->rx_sge[1].length = PAGE_SIZE;
+ priv->rx_sge[1].lkey = priv->mr->lkey;
+ priv->rx_wr.num_sge = XVE_UD_RX_SG;
+ } else {
+ priv->rx_sge[0].length = XVE_UD_BUF_SIZE(priv->max_ib_mtu);
+ priv->rx_wr.num_sge = 1;
+ }
+ priv->rx_wr.next = NULL;
+ priv->rx_wr.sg_list = priv->rx_sge;
+
+ return 0;
+
+out_free_send_cq:
+ ib_destroy_cq(priv->send_cq);
+
+out_free_recv_cq:
+ ib_destroy_cq(priv->recv_cq);
+
+out_free_mr:
+ ib_dereg_mr(priv->mr);
+ xve_cm_dev_cleanup(dev);
+
+out_free_pd:
+ ib_dealloc_pd(priv->pd);
+ return -ENODEV;
+}
+
+void xve_transport_dev_cleanup(struct net_device *dev)
+{
+ struct xve_dev_priv *priv = netdev_priv(dev);
+ int ret = 0;
+
+ if (priv->qp) {
+ if (ib_destroy_qp(priv->qp))
+ xve_warn(priv, "ib_qp_destroy failed\n");
+ priv->qp = NULL;
+ clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+ }
+ ret = ib_destroy_cq(priv->send_cq);
+ if (ret)
+ xve_warn(priv, "%s ib_destroy_cq (sendq) failed ret=%d\n",
+ __func__, ret);
+
+ ret = ib_destroy_cq(priv->recv_cq);
+ if (ret)
+ xve_warn(priv, "%s ib_destroy_cq failed ret=%d\n",
+ __func__, ret);
+
+ xve_cm_dev_cleanup(dev);
+
+ ret = ib_dereg_mr(priv->mr);
+ if (ret)
+ xve_warn(priv, "%s ib_dereg_mr failed ret=%d\n", __func__, ret);
+
+ ret = ib_dealloc_pd(priv->pd);
+ if (ret)
+ xve_warn(priv, "%s ib_dealloc_pd failed ret=%d\n",
+ __func__, ret);
+}
+
+void xve_event(struct ib_event_handler *handler, struct ib_event *record)
+{
+ struct xve_dev_priv *priv =
+ container_of(handler, struct xve_dev_priv, event_handler);
+
+ if (record->element.port_num != priv->port)
+ return;
+
+ xve_debug(DEBUG_MCAST_INFO, priv, "Event %d on device %s port %d\n",
+ record->event, record->device->name,
+ record->element.port_num);
+
+ switch (record->event) {
+ case IB_EVENT_SM_CHANGE:
+ priv->counters[XVE_SM_CHANGE_COUNTER]++;
+ xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
+ break;
+ case IB_EVENT_CLIENT_REREGISTER:
+ priv->counters[XVE_CLIENT_REREGISTER_COUNTER]++;
+ set_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags);
+ xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
+ break;
+ case IB_EVENT_PORT_ERR:
+ priv->counters[XVE_EVENT_PORT_ERR_COUNTER]++;
+ xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+ break;
+ case IB_EVENT_PORT_ACTIVE:
+ priv->counters[XVE_EVENT_PORT_ACTIVE_COUNTER]++;
+ xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+ break;
+ case IB_EVENT_LID_CHANGE:
+ priv->counters[XVE_EVENT_LID_CHANGE_COUNTER]++;
+ xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+ break;
+ case IB_EVENT_PKEY_CHANGE:
+ priv->counters[XVE_EVENT_PKEY_CHANGE_COUNTER]++;
+ xve_queue_work(priv, XVE_WQ_START_FLUSHHEAVY);
+ break;
+ default:
+ priv->counters[XVE_INVALID_EVENT_COUNTER]++;
+ break;
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XVE_XSMP_MSGS_H__
+#define __XVE_XSMP_MSGS_H__
+
+#define XVE_MAX_NAME_SIZE 16
+#define XVE_MAX_PROC_NAME_SIZE 32
+#define CHASSIS_MAX_NAME_SIZE 32
+#define SESSION_MAX_NAME_SIZE 32
+#define XVE_MAX_HOST_NAME 32
+#define XVE_MP_GROUP_NAME_MAX (XVE_MAX_NAME_SIZE + XVE_MAX_HOST_NAME)
+
+enum xve_xsmp_cmd_type {
+ XSMP_XVE_INVALID,
+ XSMP_XVE_INSTALL,
+ XSMP_XVE_DELETE,
+ XSMP_XVE_UPDATE,
+ XSMP_XVE_ADMIN_UP,
+ XSMP_XVE_ADMIN_DOWN,
+ XSMP_XVE_OPER_UP,
+ XSMP_XVE_OPER_DOWN,
+ XSMP_XVE_OPER_READY,
+ XSMP_XVE_VLANIP, /* VLAN and IP address */
+ XSMP_XVE_STATS, /* XVE driver statistics */
+ XSMP_XVE_SYNC_BEGIN,
+ XSMP_XVE_SYNC_END,
+ XSMP_XVE_INFO_REQUEST, /* request vnic info */
+ XSMP_XVE_OPER_FAILED,
+ XSMP_XVE_OPER_REQ,
+ XSMP_XVE_HA_INFO,
+ XSMP_XVE_ISCSI_INFO,
+
+ XSMP_XVE_TYPE_MAX,
+};
+
+/* XVE specific messages */
+
+struct xve_xsmp_msg {
+ union {
+ struct {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 bitmask;
+ u64 resource_id;
+ u64 tca_guid;
+ u16 tca_lid;
+ u16 mac_high;
+ u32 mac_low;
+ u16 vn_admin_rate;
+ u16 admin_state;
+ u16 encap;
+ u16 vn_mtu;
+ u32 install_flag;
+ u8 xve_name[XVE_MAX_NAME_SIZE];
+ u16 service_level; /* SL value for this vnic */
+ u16 fc_active; /* 1: enable, 0:
+ * disable host rate control */
+ u16 cir; /* committed rate in mbps */
+ u16 pir; /* peak rate in mbps */
+ u32 cbs; /* committed burst size in bytes */
+ u32 pbs; /* peak burst size in bytes */
+ u8 vm_index; /* the index used by vmware
+ * for persistence */
+ u8 _reserved;
+ u16 mp_flag;
+ u8 mp_group[XVE_MP_GROUP_NAME_MAX];
+ u8 la_flag; /* linkAggregation flag */
+ u8 la_policy;
+ /* for virtual network */
+ u32 net_id;
+ u8 vnet_mode;
+ } __packed;
+ u8 bytes[512];
+ };
+} __packed;
+
+/* The reason code for NACKing an install */
+#define XVE_NACK_DUP_NAME 1 /* duplicate name */
+#define XVE_NACK_DUP_VID 2 /* duplicate VID */
+#define XVE_NACK_LIMIT_REACHED 3 /* Max number of XVEs reached */
+#define XVE_NACK_ALLOCATION_ERROR 4 /* Error during instantiation */
+#define XVE_NACK_CODE_MAX 5
+
+/* The common XVE XSMP header for all messages */
+struct xve_xsmp_header {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 bitmask;
+ u64 resource_id;
+};
+
+/* Maximum number of dwords in an IP address (v4 or v6) */
+#define MAX_IP_ADDR_DWORDS 4
+
+/* IP address type */
+enum xve_ipaddr_type {
+ XVE_ADDR_TYPE_IPV4 = 1,
+ XVE_ADDR_TYPE_IPV6,
+};
+
+/* Bitmask values for add/delete VLAN notifications */
+#define XVE_ADD_VLAN_NOTIFY (1 << 0)
+#define XVE_DELETE_VLAN_NOTIFY (1 << 1)
+
+/* Denotes an instance of a VLANID and IP address pair */
+struct xve_xsmp_vlanip_msg {
+ union {
+ struct {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 bitmask;
+ u64 resource_id;
+ u8 ip_type;
+ u8 _reserved1;
+ u16 _reserved2;
+ u32 vlanid;
+ u32 ipaddress[MAX_IP_ADDR_DWORDS];
+ u32 netmask[MAX_IP_ADDR_DWORDS];
+ /*
+ * This does not come from chassis but locally generated
+ */
+ char ifname[XVE_MAX_NAME_SIZE];
+ u16 mp_flag;
+ } __packed;
+ u8 bytes[512];
+ };
+};
+
+struct xve_xsmp_stats_msg {
+ union {
+ struct {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 bitmask;
+ u64 resource_id;
+ u32 counter[16];
+ /*XVE IO STATS */
+ u64 stats_handle;
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 rx_errors;
+ u64 rx_drops;
+ u64 rx_overruns;
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_errors;
+ u64 tx_drops;
+ } __packed;
+ u8 bytes[512];
+ };
+};
+
+struct xve_ha_info_msg {
+ union {
+ struct {
+ u8 type;
+ u8 code;
+ u16 length;
+ u32 reserved;
+ u64 resource_id; /* vid */
+ u8 ha_state;
+ u8 name[XVE_MAX_NAME_SIZE];
+ } __packed;
+ u8 bytes[512];
+ };
+} __packed;
+
+#define ISCSI_MOUNT_DEV_NAME_LEN 100
+#define MAX_DOMAIN_NAME_LEN 64
+
+#define SAN_MOUNT_TYPE_STATIC 1
+#define SAN_MOUNT_TYPE_LVM 2
+#define SAN_MOUNT_TYPE_DIRECT 3
+
+struct xve_iscsi_info {
+ uint64_t vid;
+ uint8_t role;
+ uint16_t vlan_id;
+ uint8_t ip_type;
+ uint32_t ip_addr;
+ uint32_t netmask;
+ uint64_t mac;
+ char xve_name[XVE_MAX_NAME_SIZE];
+ uint32_t gateway_ip_address;
+ uint32_t dns_ip_address;
+ char domain_name[MAX_DOMAIN_NAME_LEN];
+ uint16_t protocol;
+ uint16_t port;
+ uint16_t lun;
+ uint32_t target_ip_address;
+ char target_iqn[ISCSI_MOUNT_DEV_NAME_LEN]; /* Target Name */
+ char target_portal_group[ISCSI_MOUNT_DEV_NAME_LEN];
+ char initiator_iqn[ISCSI_MOUNT_DEV_NAME_LEN];
+
+ uint16_t mount_type;
+ char mount_dev[ISCSI_MOUNT_DEV_NAME_LEN];
+ char mount_options[ISCSI_MOUNT_DEV_NAME_LEN];
+ char vol_group[ISCSI_MOUNT_DEV_NAME_LEN];
+ char vol_group_name[ISCSI_MOUNT_DEV_NAME_LEN];
+} __packed;
+
+struct xve_iscsi_msg {
+ union {
+ struct {
+ uint8_t type;
+ uint8_t code;
+ uint16_t length;
+ struct xve_iscsi_info iscsi_info;
+ } __packed;
+ uint8_t bytes[960];
+ };
+} __packed;
+
+/* Values for the bitmask of the install/delete/update message*/
+#define XVE_UPDATE_MAC (1 << 0)
+#define XVE_UPDATE_BANDWIDTH (1 << 1)
+#define XVE_UPDATE_MTU (1 << 2)
+#define XVE_UPDATE_TCA_INFO (1 << 3)
+#define XVE_UPDATE_SL (1 << 4)
+#define XVE_UPDATE_ENCAP (1 << 5)
+#define XVE_UPDATE_ADMIN_STATE (1 << 6)
+#define XVE_UPDATE_QOS (1 << 7)
+#define XVE_UPDATE_ACL (1 << 8)
+#define XVE_UPDATE_MP_FLAG (1 << 10)
+#define XVE_XT_STATE_DOWN (1 << 30)
+#define XVE_UPDATE_XT_CHANGE (1 << 31)
+
+/* mp_flag */
+#define MP_XVE_PRIMARY (1 << 0)
+#define MP_XVE_SECONDARY (1 << 1)
+#define MP_XVE_AUTO_SWITCH (1 << 2)
+
+/* ha_state */
+#define XVE_HA_STATE_UNKNOWN 0
+#define XVE_HA_STATE_ACTIVE 1
+#define XVE_HA_STATE_STANDBY 2
+
+/* Ack and Nack sent out in the 'code' field */
+#define XSMP_XVE_ACK (1 << 6)
+#define XSMP_XVE_NACK (1 << 7)
+
+/* Bits for the promiscuous flag field */
+#define XVE_MCAST (1 << 0)
+
+/* Defines for the install flag */
+#define XVE_INSTALL_TCP_OFFL (1 << 0)
+#define XVE_INSTALL_UDP_OFFL (1 << 1)
+#define XVE_INSTALL_TSO (1 << 3)
+#define XVE_INSTALL_RX_BAT (1 << 4)
+#define XVE_8K_IBMTU (1 << 5)
+#define XVE_INSTALL_LINK2QP (1 << 8)
+
+#define XSIGO_IP_FRAGMENT_BIT (1 << 8)
+#define XSIGO_IPV4_BIT (1 << 6)
+#define XSIGO_TCP_CHKSUM_GOOD_BIT (1 << 3)
+#define XSIGO_UDP_CHKSUM_GOOD_BIT (1 << 1)
+
+#endif /* __XVE_XSMP_MSGS_H__ */