]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
Add Oracle virtual Networking Drivers for uek4 kernel
authorPradeep Gopanapalli <pradeep.gopanapalli@oracle.com>
Wed, 23 Sep 2015 01:56:41 +0000 (18:56 -0700)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Wed, 7 Oct 2015 11:39:52 +0000 (04:39 -0700)
This commit adds 4 kernel modules: xscore, xsvnic, xve
and xsvhba developed by Xsigo (acquired by Oracle) and used in the Oracle
virtual networking (OVN) products which provide provide virtual network and
storage adapter devices on the servers dynamically at runtime.

The heart of OVN product is the Fabric Interconnect (FI).
Hosts and IO modules connect to the FI using Infiniband fabric.
IO modules can be N/W card or/and FC card.

The "xscore" module is responsible for doing FI topology discovery
and establishing the connection with FI. It is involved in retrieving
virtual device management commands such as INSTALL, DELETE, etc.
This module provides wrapper for IB framework API's which will be used
by its client  modules "xsvnic", "xsvhba" and "xve".

The "xve" module supprots the Xsigo Virtual Ethernet(XVE) protocol.
The "xsvnic" module supports the Xsigo vNIC functinality. These modules
interface between kernel networking stack and the "xscore" module.
On the egress side, it processes the N/W packet sends it to "xscore"
module which is then wrapped into a IB packet.

On the ingress side, "xscore" receives the N/W packet which is
encapsulated inside IB packet and transfers it to "xsvnic" or "xve".
The modules "xsvnic"/"xve" process this packet and send it to the
kernel networking stack. The "xsvnic" interacts with N/W card gateway
connected to the FI whereas, "xve" interacts with another host in the
same IB fabric.

The "xsvhba" module support for the Xsigo virtual HBA allowing SAN
Connectivity. The "xsvhba" module interfaces with SCSI layer. It
communicates with the FC card gateway connected to the FI. It is
responsible for accepting/transporting the SCSI commands from/to
the specified SCSI target. The "xsvhba" module uses "xscore" to
wrap(unwrap) the commands in a IB packet and transmit(receive) it.

Signed-off-by: Pradeep Gopanapalli <pradeep.gopanapalli@oracle.com>
Signed-off-by: Mukesh Kacker <mukesh.kacker@oracle.com>
64 files changed:
drivers/infiniband/Kconfig
drivers/infiniband/ulp/Makefile
drivers/infiniband/ulp/xsigo/Kconfig [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/Makefile [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/Kconfig [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/Makefile [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xg_heap.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xg_heap.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xs_compat.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xs_ud.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xs_versions.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xscore.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xscore_api.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xscore_impl.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xscore_priv.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xscore_stats.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xscore_uadm.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xscore_vpci.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xscore_xds.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xsmp.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xsmp.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xsmp_common.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xscore/xsmp_session.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/Kconfig [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/Makefile [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_align.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_align.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_attr.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_create.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_defs.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_delete.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_ib.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_ib.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_init.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_main.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_os_def.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_proc.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_scsi_intf.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_scsi_intf.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_stats.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_wq.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_xsmp.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvhba/vhba_xsmp.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvnic/Kconfig [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvnic/Makefile [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvnic/xsvnic.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvnic/xsvnic_main.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvnic/xsvnic_stats.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xsvnic/xsvnic_xsmp_msgs.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/Kconfig [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/Makefile [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/hash.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve_cm.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve_compat.h [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve_ethtool.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve_fs.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve_ib.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve_main.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve_multicast.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve_stats.c [new file with mode: 0755]
drivers/infiniband/ulp/xsigo/xve/xve_tables.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve_verbs.c [new file with mode: 0644]
drivers/infiniband/ulp/xsigo/xve/xve_xsmp_msgs.h [new file with mode: 0644]

index b899531498eb0dc7924e4587f67dbc3f7c116313..07803c48c96981a8f493dd60d741b1156a3f9821 100644 (file)
@@ -69,6 +69,8 @@ source "drivers/infiniband/hw/usnic/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
+source "drivers/infiniband/ulp/xsigo/Kconfig"
+
 source "drivers/infiniband/ulp/srp/Kconfig"
 source "drivers/infiniband/ulp/srpt/Kconfig"
 
index f3c7dcf03098a51c9a86ffecb21a815148795649..f0c2925711d4e6f3289acd72d6195e10bfcc517c 100644 (file)
@@ -1,4 +1,5 @@
 obj-$(CONFIG_INFINIBAND_IPOIB)         += ipoib/
+obj-$(CONFIG_INFINIBAND_XSCORE)                += xsigo/
 obj-$(CONFIG_INFINIBAND_SRP)           += srp/
 obj-$(CONFIG_INFINIBAND_SRPT)          += srpt/
 obj-$(CONFIG_INFINIBAND_ISER)          += iser/
diff --git a/drivers/infiniband/ulp/xsigo/Kconfig b/drivers/infiniband/ulp/xsigo/Kconfig
new file mode 100644 (file)
index 0000000..5c85c56
--- /dev/null
@@ -0,0 +1,4 @@
+source "drivers/infiniband/ulp/xsigo/xscore/Kconfig"
+source "drivers/infiniband/ulp/xsigo/xsvnic/Kconfig"
+source "drivers/infiniband/ulp/xsigo/xsvhba/Kconfig"
+source "drivers/infiniband/ulp/xsigo/xve/Kconfig"
diff --git a/drivers/infiniband/ulp/xsigo/Makefile b/drivers/infiniband/ulp/xsigo/Makefile
new file mode 100644 (file)
index 0000000..053e4ef
--- /dev/null
@@ -0,0 +1,4 @@
+obj-$(CONFIG_INFINIBAND_XSCORE)                += xscore/
+obj-$(CONFIG_INFINIBAND_XSVNIC)                += xsvnic/
+obj-$(CONFIG_INFINIBAND_XSVHBA)                += xsvhba/
+obj-$(CONFIG_INFINIBAND_XVE)           += xve/
diff --git a/drivers/infiniband/ulp/xsigo/xscore/Kconfig b/drivers/infiniband/ulp/xsigo/xscore/Kconfig
new file mode 100644 (file)
index 0000000..b5737a2
--- /dev/null
@@ -0,0 +1,6 @@
+config INFINIBAND_XSCORE
+       tristate "Xsigo fabric support for InfiniBand devices"
+       depends on NETDEVICES && INET && INFINIBAND
+       ---help---
+         Support for Xsigo fabric for Infiniband devices. This will enable
+          Infiniband data transfer through Xsigo director.
diff --git a/drivers/infiniband/ulp/xsigo/xscore/Makefile b/drivers/infiniband/ulp/xsigo/xscore/Makefile
new file mode 100644 (file)
index 0000000..ceb69fe
--- /dev/null
@@ -0,0 +1,9 @@
+obj-$(CONFIG_INFINIBAND_XSCORE) := xscore.o
+xscore-y := xscore_impl.o xs_ud.o xscore_api.o xsmp.o \
+           xscore_stats.o xscore_uadm.o
+
+ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\"
+ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS
+ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE
+ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT
+ccflags-y += -Idrivers/infiniband/include
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xg_heap.c b/drivers/infiniband/ulp/xsigo/xscore/xg_heap.c
new file mode 100644 (file)
index 0000000..f824451
--- /dev/null
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/radix-tree.h>
+#include <linux/notifier.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+
+#include "xg_heap.h"
+
+#ifdef __KERNEL__
+#define RADIX_TREE_MAP_SHIFT    6
+#else
+#define RADIX_TREE_MAP_SHIFT    3      /* For more stressful testing */
+#endif
+#define RADIX_TREE_TAGS         2
+
+#define RADIX_TREE_MAP_SIZE     (1UL << RADIX_TREE_MAP_SHIFT)
+#define RADIX_TREE_MAP_MASK     (RADIX_TREE_MAP_SIZE-1)
+
+#define RADIX_TREE_TAG_LONGS    \
+       ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+struct radix_tree_node {
+       unsigned int count;
+       void *slots[RADIX_TREE_MAP_SIZE];
+       unsigned long tags[RADIX_TREE_TAGS][RADIX_TREE_TAG_LONGS];
+};
+
+struct radix_tree_path {
+       struct radix_tree_node *node;
+       int offset;
+};
+
+#define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
+#define RADIX_TREE_MAX_PATH (RADIX_TREE_INDEX_BITS/RADIX_TREE_MAP_SHIFT + 2)
+
+static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH] __read_mostly;
+
+/*
+ * Radix tree node cache.
+ */
+static kmem_cache_t *radix_tree_node_cachep;
+
+/*
+ * Per-cpu pool of preloaded nodes
+*/
+struct radix_tree_preload {
+       int nr;
+       struct radix_tree_node *nodes[RADIX_TREE_MAX_PATH];
+};
+
+vmk_moduleid moduleid;
+vmk_heapid heapid;
+
+void memory_thread_init(void)
+{
+       moduleid = vmk_modulestacktop();
+       pr_info("module id = %d\n", moduleid);
+       heapid = vmk_modulegetheapid(moduleid);
+}
+
+void *ib_alloc_pages(unsigned int flags, unsigned int order)
+{
+       void *vaddr;
+       unsigned long size = (VMK_PAGE_SIZE << order);
+
+       vaddr = vmk_heapalign(heapid, size, PAGE_SIZE);
+       if (vaddr == NULL)
+               return 0;
+
+       return vaddr;
+}
+EXPORT_SYMBOL(ib_alloc_pages);
+
+void ib_free_pages(void *ptr, int order)
+{
+       vmk_heapfree(heapid, ptr);
+}
+EXPORT_SYMBOL(ib_free_pages);
+
+void *ib_kmalloc(size_t size, gfp_t flags)
+{
+       return vmk_heapalloc(heapid, size);
+}
+EXPORT_SYMBOL(ib_kmalloc);
+
+void ib_free(void *ptr)
+{
+       vmk_heapfree(heapid, ptr);
+}
+EXPORT_SYMBOL(ib_free);
+
+static int __init ib_kompat_init(void)
+{
+       radix_tree_init();
+       memory_thread_init();
+       return 0;
+}
+
+static void __exit ib_kompat_cleanup(void)
+{
+       radix_tree_destroy();
+}
+
+int xg_vmk_kompat_init(void)
+{
+       return ib_kompat_init();
+}
+
+void xg_vmk_kompat_cleanup(void)
+{
+       return ib_kompat_cleanup();
+}
+
+/*
+ * We added iowrite64_copy because it is a missing API
+ */
+void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
+{
+       u64 __iomem *dst = to;
+       const u64 *src = from;
+       const u64 *end = src + count;
+
+       while (src < end)
+               __raw_writeq(*src++, dst++);
+}
+EXPORT_SYMBOL(__iowrite64_copy);
+
+/*
+ * memmove() implementation taken from vmklinux26/linux/lib/string.c
+ */
+void *memmove(void *dest, const void *src, size_t count)
+{
+       char *tmp;
+       const char *s;
+
+       if (dest <= src) {
+               tmp = dest;
+               s = src;
+               while (count--)
+                       *tmp++ = *s++;
+       } else {
+               tmp = dest;
+               tmp += count;
+               s = src;
+               s += count;
+               while (count--)
+                       *--tmp = *--s;
+       }
+       return dest;
+}
+EXPORT_SYMBOL(memmove);
+
+/* functions from radix-tree.c */
+static void
+radix_tree_node_ctor(void *node, kmem_cache_t *cachep, unsigned long flags)
+{
+       memset(node, 0, sizeof(struct radix_tree_node));
+}
+
+static __init unsigned long __maxindex(unsigned int height)
+{
+       unsigned int tmp = height * RADIX_TREE_MAP_SHIFT;
+       unsigned long index = (~0UL >> (RADIX_TREE_INDEX_BITS - tmp - 1)) >> 1;
+
+       if (tmp >= RADIX_TREE_INDEX_BITS)
+               index = ~0UL;
+       return index;
+}
+
+static __init void radix_tree_init_maxindex(void)
+{
+       unsigned int i;
+
+       for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
+               height_to_maxindex[i] = __maxindex(i);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static int radix_tree_callback(struct notifier_block *nfb,
+                              unsigned long action, void *hcpu)
+{
+       int cpu = (long)hcpu;
+       struct radix_tree_preload *rtp;
+       return NOTIFY_OK;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
+void __init radix_tree_init(void)
+{
+       radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
+                                                  sizeof(struct
+                                                         radix_tree_node), 0,
+                                                  SLAB_PANIC,
+                                                  radix_tree_node_ctor, NULL);
+       radix_tree_init_maxindex();
+}
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xg_heap.h b/drivers/infiniband/ulp/xsigo/xscore/xg_heap.h
new file mode 100644 (file)
index 0000000..ed60bb2
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * This software is available to you under the OpenIB.org BSD license,
+ * available in the LICENSE.TXT file accompanying this software.
+ * These details are also available at <http://openib.org/license.html>.
+ *
+ */
+
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+#include <linux/io.h>
+#include <linux/in.h>
+#include <linux/workqueue.h>
+#include <linux/log2.h>
+#include <linux/byteorder/swab.h>
+#include <linux/mutex.h>
+#ifndef EXPORT_SYMTAB
+#define EXPORT_SYMTAB
+#endif
+
+#define for_each_netdev(a, dev)        for ((dev) = dev_base;\
+                               (dev) != NULL;\
+                               (dev) = (dev)->next)
+
+void *memmove(void *dest, const void *src, size_t count);
+
+#ifndef bool
+#define bool int
+#define true 1
+#define false 0
+#endif
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xs_compat.h b/drivers/infiniband/ulp/xsigo/xscore/xs_compat.h
new file mode 100644 (file)
index 0000000..0171468
--- /dev/null
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef        _XS_COMPAT_H
+#define        _XS_COMPAT_H
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/kobject.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+#include <linux/io.h>
+#include <linux/in.h>
+#include <linux/log2.h>
+#include <linux/mutex.h>
+#include <linux/version.h>
+#include <linux/idr.h>
+#include <linux/netdevice.h>
+#include <linux/tcp.h>
+#include <linux/workqueue.h>
+#include <rdma/ib_verbs.h>
+
+/*
+ * Workqueue changes backport for kernel < linux 2.6.20
+ * ESX 4.0 has these changes and hence no need of this
+ */
+
+typedef void *xsmp_cookie_t;
+
+#if defined(XSIGOPSEUDOFLAG)
+/*
+ * ESX-5.1 pseudo device registration.
+ */
+static inline void xg_preregister_pseudo_device(struct net_device *netdev)
+{
+       if (netdev->pdev) {
+               netdev->pdev->netdev = NULL;
+               netdev->pdev_pseudo = netdev->pdev;
+               netdev->pdev = NULL;
+       }
+}
+
+static inline void xg_setup_pseudo_device(struct net_device *netdev,
+                                         struct ib_device *hca)
+{
+       netdev->features |= NETIF_F_PSEUDO_REG;
+       SET_NETDEV_DEV(netdev, hca->dma_device);
+}
+#else
+static inline void xg_preregister_pseudo_device(struct net_device *netdev)
+{
+}
+
+static inline void xg_setup_pseudo_device(struct net_device *netdev,
+                                         struct ib_device *hca)
+{
+}
+#endif
+
+static inline void xg_set_netdev_dev(struct net_device *netdev,
+                                    struct ib_device *hca)
+{
+}
+
+#ifndef BACKPORT_LINUX_WORKQUEUE_TO_2_6_19
+
+#endif
+
+#if !defined(XG_FRAG_SIZE_PRESENT)
+
+static inline unsigned int skb_frag_size(const skb_frag_t *frag)
+{
+       return frag->size;
+}
+
+#endif
+
+#if !defined(XG_FRAG_PAGE_PRESENT)
+
+static inline struct page *skb_frag_page(const skb_frag_t *frag)
+{
+       return frag->page;
+}
+
+#endif
+
+#include <scsi/scsi_cmnd.h>
+
+#if defined(SCSI_STRUCT_CHANGES)
+
+static inline void scsi_set_buffer(struct scsi_cmnd *cmd, void *buffer)
+{
+       cmd->sdb.table.sgl = buffer;
+}
+
+static inline void set_scsi_sg_count(struct scsi_cmnd *cmd, int cnt)
+{
+       cmd->sdb.table.nents = cnt;
+}
+
+#else /* ! defined(SCSI_STRUCT_CHANGES) */
+
+static inline void scsi_set_buffer(struct scsi_cmnd *cmd, void *buffer)
+{
+       cmd->request_buffer = buffer;
+}
+
+#define set_scsi_sg_count(cmd, cnt)    ((cmd)->use_sg = (cnt))
+
+#ifndef        scsi_sg_count
+
+#define scsi_sg_count(cmd) ((cmd)->use_sg)
+#define scsi_sglist(cmd) ((struct scatterlist *)(cmd)->request_buffer)
+#define scsi_bufflen(cmd) ((cmd)->request_bufflen)
+
+static inline void scsi_set_resid(struct scsi_cmnd *cmd, int resid)
+{
+       cmd->resid = resid;
+}
+
+static inline int scsi_get_resid(struct scsi_cmnd *cmd)
+{
+       return cmd->resid;
+}
+
+#define scsi_for_each_sg(cmd, sg, nseg, __i)                    \
+       for_each_sg(scsi_sglist(cmd), sg, nseg, __i)
+
+#endif
+
+#ifndef        sg_page
+#define sg_page(x) ((x)->page)
+#endif
+
+#endif /* ! defined(SCSI_STRUCT_CHANGES) */
+
+#if defined(SCSI_TIMEOUT_CHANGES)
+#define timeout_per_command(cmd)       ((cmd)->request->timeout)
+#define vhba_reset_scsi_timeout(cmd, jiffies)  /* NOTHING */
+#else /* ! defined(SCSI_TIMEOUT_CHANGES) */
+#define timeout_per_command(cmd)       ((cmd)->timeout_per_command)
+#define vhba_reset_scsi_timeout(cmd, jiffies)                  \
+do {                                                           \
+       if ((cmd)->eh_timeout.function)                         \
+               mod_timer(&(cmd)->eh_timeout, jiffies)          \
+} while (0)
+#endif /* ! defined(SCSI_TIMEOUT_CHANGES) */
+
+#define        SET_OWNER(file) do { } while (0)
+
+/*
+ * In 2.6.31 added new netdev_ops in netdev
+ */
+#define SET_NETDEV_OPS(netdev, ops) \
+       ((netdev)->netdev_ops = (ops))
+
+#if !defined(HAS_SKB_ACCESS_FUNCTIONS)
+
+static inline struct tcphdr *tcp_hdr(const struct sk_buff *skb)
+{
+       return (struct tcphdr *)skb_transport_header(skb);
+}
+
+static inline unsigned int tcp_hdrlen(const struct sk_buff *skb)
+{
+       return skb->h.th->doff << 2;
+}
+
+static inline unsigned int tcp_optlen(const struct sk_buff *skb)
+{
+       return (skb->h.th->doff - 5) * 4;
+}
+
+static inline void skb_reset_network_header(struct sk_buff *skb)
+{
+       skb->nh.raw = skb->data;
+}
+#endif
+
+/*
+ * Backported NAPI changes  ESX 4.0 already supports it
+ */
+
+static inline void napi_update_budget(struct napi_struct *n, int cnt)
+{
+}
+
+#ifndef        NETIF_F_GRO
+#define        NETIF_F_GRO     0
+#endif
+
+#ifndef        NETIF_F_GSO
+#define        NETIF_F_GSO     0
+#endif
+
+#ifndef        IFF_SLAVE_INACTIVE
+#define        IFF_SLAVE_INACTIVE      0x4
+#endif
+
+#ifndef        CHECKSUM_PARTIAL
+#define        CHECKSUM_PARTIAL        CHECKSUM_HW
+#endif
+
+#if !defined(LLE) && defined(IB_VERBS_H)
+#if defined(NATIVE_IB_STACK_CHECK)
+enum rdma_link_layer {
+       IB_LINK_LAYER_UNSPECIFIED,
+       IB_LINK_LAYER_INFINIBAND,
+       IB_LINK_LAYER_ETHERNET,
+};
+
+static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid)
+{
+       memset(gid->raw, 0, 16);
+       *((u32 *) gid->raw) = cpu_to_be32(0xfe800000);
+       if (vid) {
+               gid->raw[12] = vid & 0xff;
+               gid->raw[11] = vid >> 8;
+       } else {
+               gid->raw[12] = 0xfe;
+               gid->raw[11] = 0xff;
+       }
+
+       memcpy(gid->raw + 13, mac + 3, 3);
+       memcpy(gid->raw + 8, mac, 3);
+       gid->raw[8] ^= 2;
+}
+#endif /*IB_REV_106_CHECK */
+
+static inline enum rdma_link_layer rdma_port_link_layer(struct ib_device
+                                                       *device, u8 port_num)
+{
+       return IB_LINK_LAYER_INFINIBAND;
+}
+
+#endif /* ! defined(LLE) */
+
+#if defined(LLE) && defined(RDMA_PORT_LINK_LAYER_CHANGES)
+#define        rdma_port_link_layer rdma_port_get_link_layer
+#endif
+
+#define PROC_ROOT       0
+
+extern int xscore_uadm_init(void);
+extern void xscore_uadm_destroy(void);
+extern void xscore_uadm_receive(xsmp_cookie_t xsmp_hndl, u8 *data, int len);
+
+/* required for IB_REV_106 */
+#if !defined(IB_REV_106_CHECK) || !defined(IB_REV_110_CHECK)
+#define xg_vmk_kompat_init() do {} while (0)
+#define xg_vmk_kompat_cleanup() do {} while (0)
+#else
+extern int xg_vmk_kompat_init(void);
+extern void xg_vmk_kompat_cleanup(void);
+#endif
+
+#define VMWARE_RESERVED_KEYS ""
+#define SG_OFFSET(sg) (sg->offset)
+#define SG_LENGTH(sg) (sg->length)
+#define        SG_NEXT(sg) (sg++)
+#define        SG_RESET(sg) {}
+#define ib_sa_force_update(client, dev, attr, value, mode)  do {} while (0)
+
+#define        GET_NLINK(file)         ((file)->nlink)
+#define SET_NLINK(file, value) ((file)->nlink = (value))
+
+/*
+ * 8k IBMTU support
+ */
+enum xg_ib_mtu {
+       IB_MTU_8192 = 6
+};
+
+static inline int xg_ib_mtu_enum_to_int(enum ib_mtu _mtu)
+{
+       int mtu = (int)_mtu;
+
+       switch (mtu) {
+       case IB_MTU_256:
+               return 256;
+       case IB_MTU_512:
+               return 512;
+       case IB_MTU_1024:
+               return 1024;
+       case IB_MTU_2048:
+               return 2048;
+       case IB_MTU_4096:
+               return 4096;
+       case IB_MTU_8192:
+               return 8192;
+       default:
+               return -1;
+       }
+}
+#endif /* _XS_COMPAT_H */
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xs_ud.c b/drivers/infiniband/ulp/xsigo/xscore/xs_ud.c
new file mode 100644 (file)
index 0000000..4efa7c6
--- /dev/null
@@ -0,0 +1,457 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * This file implements the UD send/receive stuff
+ */
+
+#include "xscore_priv.h"
+
+#define        XS_UD_RECV_WQE          16
+#define        XS_UD_SEND_WQE          8
+
+#define MAX_UD_RX_BUF_SIZE         1024
+#define MAX_UD_TX_BUF_SIZE         1024
+
+#define XSUD_RECV_WRID         0x10000
+#define XSUD_SEND_WRID         0x20000
+#define XSUD_WRID_MASK         0x30000
+
+#define QP_DEF_QKEY             0x11111111
+#define QP_MULTICAST_QPN        0xFFFFFF
+#define QP_MCAST_LID            0xC000
+
+struct ud_tx_buf {
+       void *vaddr;
+       u64 mapping;
+       struct ib_ah *ah;
+       int len;
+};
+
+struct ud_rx_buf {
+       void *vaddr;
+       int len;
+       u64 mapping;
+};
+
+/*
+ * This has context inforamtion on UD
+ */
+struct ib_ud_ctx {
+       struct xscore_port *pinfop;
+       struct ib_cq *cq;
+       struct ib_qp *qp;
+       struct ud_rx_buf rxbuf[XS_UD_RECV_WQE];
+       struct ud_tx_buf txbuf[XS_UD_SEND_WQE];
+       int next_xmit;
+       void (*callback)(void *arg, void *msg, int len);
+       void *client_arg;
+};
+
+static int xs_ud_post_recv(struct ib_ud_ctx *ctx, int offset, int n)
+{
+       struct xscore_port *pinfop = ctx->pinfop;
+       struct ib_device *ca = pinfop->xs_dev->device;
+       struct ib_sge list = {
+               .lkey = pinfop->xs_dev->mr->lkey
+       };
+       struct ib_recv_wr wr = {
+               .sg_list = &list,
+               .num_sge = 1,
+       };
+       struct ib_recv_wr *bad_wr;
+       int i, ret;
+       void *addr;
+       u64 mapping;
+
+       for (i = 0; i < n; ++i, ++offset) {
+               struct ud_rx_buf *rbuf = &ctx->rxbuf[offset];
+
+               addr = kmalloc(MAX_UD_RX_BUF_SIZE, GFP_ATOMIC);
+               if (!addr) {
+                       ret = -ENOMEM;
+                       goto partial_failure;
+               }
+               rbuf->vaddr = addr;
+               /*
+                * Map the buffer and give the bus address
+                */
+               mapping = ib_dma_map_single(ca, addr, MAX_UD_RX_BUF_SIZE,
+                                           DMA_FROM_DEVICE);
+               if (unlikely(ib_dma_mapping_error(ca, mapping))) {
+                       ret = -EIO;
+                       goto partial_failure;
+               }
+               rbuf->mapping = mapping;
+               list.addr = (unsigned long)mapping;
+               list.length = MAX_UD_RX_BUF_SIZE;
+               wr.wr_id = (int)(offset | XSUD_RECV_WRID);
+               ret = ib_post_recv(ctx->qp, &wr, &bad_wr);
+               if (ret) {
+                       pr_info("xs_ud_post_recv: ib_post_recv");
+                       pr_info(" error, i %d, ret = %d\n", i, ret);
+                       goto partial_failure;
+               }
+       }
+       return 0;
+partial_failure:
+       for (; i >= 0; i--, offset--) {
+               struct ud_rx_buf *rbuf = &ctx->rxbuf[offset];
+
+               if (rbuf->mapping) {
+                       ib_dma_unmap_single(ca, rbuf->mapping,
+                                           MAX_UD_RX_BUF_SIZE,
+                                           DMA_FROM_DEVICE);
+                       rbuf->mapping = 0;
+               }
+               if (rbuf->vaddr != NULL) {
+                       kfree(rbuf->vaddr);
+                       rbuf->vaddr = 0;
+               }
+       }
+       return ret;
+}
+
+static void handle_wc(struct ib_ud_ctx *udp, struct ib_wc *wcp)
+{
+       void *buf;
+       struct ib_device *ca = udp->pinfop->xs_dev->device;
+       struct ud_tx_buf *tbuf;
+       struct ud_rx_buf *rbuf;
+       int ind = (int)wcp->wr_id & 0xFFFF;
+       int wrid = (int)wcp->wr_id & XSUD_WRID_MASK;
+
+       switch (wrid) {
+       case XSUD_SEND_WRID:
+               tbuf = &udp->txbuf[ind];
+               ib_destroy_ah(tbuf->ah);
+               ib_dma_unmap_single(ca, tbuf->mapping, tbuf->len,
+                                   DMA_TO_DEVICE);
+               kfree(tbuf->vaddr);
+               tbuf->vaddr = 0;
+               tbuf->ah = 0;
+               tbuf->mapping = 0;
+               break;
+       case XSUD_RECV_WRID:
+               rbuf = &udp->rxbuf[ind];
+               ib_dma_unmap_single(ca, rbuf->mapping, MAX_UD_RX_BUF_SIZE,
+                                   DMA_FROM_DEVICE);
+               buf = rbuf->vaddr;
+               /*
+                * Allocate new buffer in its place
+                */
+               if ((wcp->status == 0) && udp->callback) {
+                       (void)xs_ud_post_recv(udp, ind, 1);
+                       /*
+                        * Get rid of the GRH header
+                        */
+                       udp->callback(udp->client_arg,
+                                     buf + sizeof(struct ib_grh),
+                                     wcp->byte_len - sizeof(struct ib_grh));
+               } else
+                       kfree(buf);
+               break;
+       default:
+               pr_warn("xscore: UD unknown WR id\n");
+               break;
+       }
+}
+
+static void ud_compl_handler(struct ib_cq *cq, void *cq_context)
+{
+       struct ib_ud_ctx *udp = cq_context;
+       struct ib_wc wc[1];
+       int i, n;
+
+       /*
+        * Enable interrupts back again
+        */
+       (void)ib_req_notify_cq(cq,
+                              IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+
+       while ((n = ib_poll_cq(cq, 1, wc)) > 0) {
+               for (i = 0; i < n; i++)
+                       handle_wc(udp, &wc[i]);
+       }
+}
+
+int xs_ud_send_msg(struct xscore_port *pinfop, uint8_t *macp, void *msgp,
+                  int len, int flags)
+{
+       struct ib_ud_ctx *udp = pinfop->ib_ud_ctx;
+       struct ib_device *ca = pinfop->xs_dev->device;
+       u64 mapping;
+       void *addr = msgp;
+       int i;
+       struct ib_sge list = {
+               .length = len,
+               .lkey = pinfop->xs_dev->mr->lkey
+       };
+       struct ib_send_wr wr = {
+               .sg_list = &list,
+               .num_sge = 1,
+               .opcode = IB_WR_SEND,
+               .send_flags = IB_SEND_SIGNALED,
+               .wr = {
+                      .ud = {
+                             .remote_qpn = QP_MULTICAST_QPN,
+                             .remote_qkey = QP_DEF_QKEY}
+                      }
+       };
+       struct ib_send_wr *bad_wr;
+       union ib_gid dgid;
+       struct ib_ah_attr ah_attr = {
+               .dlid = QP_MCAST_LID,
+               .sl = 0,
+               .src_path_bits = 0,
+               .port_num = pinfop->port_num
+       };
+       struct ud_tx_buf *tbuf;
+       int ret;
+
+       i = udp->next_xmit;
+       tbuf = &udp->txbuf[i];
+       if (tbuf->vaddr)
+               return -ENOBUFS;
+       if (flags & XS_UD_COPY_MSG) {
+               addr = kmalloc(len + 40, GFP_KERNEL);
+               if (!addr)
+                       return -ENOMEM;
+               memcpy(addr, msgp, len);
+       }
+       mapping = ib_dma_map_single(ca, addr, len + 40, DMA_TO_DEVICE);
+       if (unlikely(ib_dma_mapping_error(ca, mapping))) {
+               if (flags & XS_UD_COPY_MSG)
+                       kfree(addr);
+               return -EIO;
+       }
+       tbuf->vaddr = addr;
+       tbuf->mapping = mapping;
+       tbuf->len = len + 40;
+       udp->next_xmit = (i + 1) % XS_UD_SEND_WQE;
+       list.addr = mapping;
+       wr.wr_id = i | XSUD_SEND_WRID;
+       /*
+        * Create a address handle and transmit the message
+        */
+       memset(&dgid, 0, sizeof(dgid));
+       /*
+        * Send it all Nodes IPv6 multicast address
+        * 0xff02::01
+        */
+       *((u32 *) dgid.raw) = cpu_to_be32(0xff020000);
+       dgid.raw[15] = 1;
+
+       ah_attr.grh.hop_limit = 1;
+       ah_attr.grh.dgid = dgid;
+       ah_attr.ah_flags = IB_AH_GRH;
+       tbuf->ah = ib_create_ah(pinfop->xs_dev->pd, &ah_attr);
+       if (IS_ERR(tbuf->ah)) {
+               XDDS_ERROR("%s: ib_create_ah failed, port: %d, index: %d\n",
+                          __func__, pinfop->port_num, i);
+               ret = PTR_ERR(tbuf->ah);
+               goto err;
+       }
+       wr.wr.ud.ah = tbuf->ah;
+       ret = ib_post_send(udp->qp, &wr, &bad_wr);
+       if (ret)
+               goto err1;
+       return 0;
+err1:
+       ib_destroy_ah(tbuf->ah);
+       tbuf->ah = 0;
+err:
+       tbuf->vaddr = 0;
+       ib_dma_unmap_single(ca, tbuf->mapping, tbuf->len, DMA_TO_DEVICE);
+       tbuf->mapping = 0;
+       if (flags & XS_UD_COPY_MSG)
+               kfree(addr);
+       return ret;
+}
+
+int xs_ud_create(struct xscore_port *pinfop,
+                void (*callback)(void *, void *, int), void *arg)
+{
+       int ret = 0;
+       struct ib_ud_ctx *udp;
+       struct ib_qp_init_attr init_attr = {
+               .cap = {
+                       .max_send_wr = XS_UD_SEND_WQE + 1,
+                       .max_recv_wr = XS_UD_RECV_WQE + 1,
+                       .max_send_sge = 1,
+                       .max_recv_sge = 1},
+               .qp_type = IB_QPT_UD,
+       };
+       struct ib_qp_attr qp_attr = {
+               .qp_state = IB_QPS_INIT,
+               .pkey_index = 0,
+               .port_num = pinfop->port_num,
+               .qkey = QP_DEF_QKEY
+       };
+
+       /*
+        * Only do this once per port
+        */
+       if (pinfop->ib_ud_ctx != NULL)
+               return 0;
+
+       XDDS_INFO("%s: Creating guid: 0x%llx\n", __func__, pinfop->guid);
+
+       udp = kmalloc(sizeof(*udp), GFP_KERNEL);
+       if (!udp)
+               return -ENOMEM;
+       memset(udp, 0, sizeof(*udp));
+       udp->pinfop = pinfop;
+       udp->callback = callback;
+       udp->client_arg = arg;
+
+       pinfop->ib_ud_ctx = udp;
+       /*
+        * Create completion Q for send and receive (A single one is enough)
+        */
+       udp->cq = ib_create_cq(pinfop->xs_dev->device,
+                              ud_compl_handler, NULL,
+                              (void *)udp, XS_UD_RECV_WQE + XS_UD_SEND_WQE, 0);
+       if (IS_ERR(udp->cq)) {
+               ret = PTR_ERR(udp->cq);
+               XDDS_ERROR("%s: b_create_cq, port: %d, ret : %d\n",
+                          __func__, pinfop->port_num, ret);
+               goto err_0;
+       }
+
+       init_attr.send_cq = udp->cq;
+       init_attr.recv_cq = udp->cq;
+
+       udp->qp = ib_create_qp(pinfop->xs_dev->pd, &init_attr);
+       if (IS_ERR(udp->qp)) {
+               ret = PTR_ERR(udp->qp);
+               XDDS_ERROR("%s: b_create_qp, port: %d, ret : %d\n",
+                          __func__, pinfop->port_num, ret);
+               goto err_1;
+       }
+       /*
+        * Now move the QP to RTS state and post recvs
+        */
+       ret = ib_modify_qp(udp->qp, &qp_attr,
+                          IB_QP_STATE |
+                          IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY);
+       if (ret) {
+               XDDS_ERROR("%s: ib_modify_qp, port: %d, ret : %d\n",
+                          __func__, pinfop->port_num, ret);
+               goto err_2;
+       }
+
+       qp_attr.qp_state = IB_QPS_RTR;
+
+       ret = ib_modify_qp(udp->qp, &qp_attr, IB_QP_STATE);
+       if (ret) {
+               XDDS_ERROR("%s: ib_modify_qp, port: %d, ret : %d\n",
+                          __func__, pinfop->port_num, ret);
+               goto err_2;
+       }
+
+       qp_attr.qp_state = IB_QPS_RTS;
+       qp_attr.sq_psn = 0;
+
+       ret = ib_modify_qp(udp->qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
+       if (ret) {
+               XDDS_ERROR("%s: ib_modify_qp, port: %d, ret : %d\n",
+                          __func__, pinfop->port_num, ret);
+               goto err_2;
+       }
+       /*
+        * Now post recvs
+        */
+       ret = xs_ud_post_recv(udp, 0, XS_UD_RECV_WQE);
+       if (ret) {
+               XDDS_ERROR("%s: xs_ud_post_recv, port: %d, ret : %d\n",
+                          __func__, pinfop->port_num, ret);
+               goto err_2;
+       }
+
+       (void)ib_req_notify_cq(udp->cq,
+                              IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+
+       return 0;
+err_2:
+       ib_destroy_qp(udp->qp);
+err_1:
+       ib_destroy_cq(udp->cq);
+err_0:
+       kfree(udp);
+       pinfop->ib_ud_ctx = 0;
+       return ret;
+}
+
+void xs_ud_destroy(struct xscore_port *pinfop)
+{
+       struct ib_ud_ctx *udp = pinfop->ib_ud_ctx;
+       struct ib_device *ca = pinfop->xs_dev->device;
+       int i;
+
+       if (!udp)
+               return;
+       ib_destroy_qp(udp->qp);
+       ib_destroy_cq(udp->cq);
+       /*
+        * Flush out all buffers
+        */
+       for (i = 0; i < XS_UD_RECV_WQE; i++) {
+               struct ud_rx_buf *rbuf = &udp->rxbuf[i];
+
+               if (rbuf->mapping)
+                       ib_dma_unmap_single(ca, rbuf->mapping,
+                                           MAX_UD_RX_BUF_SIZE,
+                                           DMA_FROM_DEVICE);
+               if (rbuf->vaddr != NULL)
+                       kfree(rbuf->vaddr);
+       }
+       for (i = 0; i < XS_UD_SEND_WQE; i++) {
+               struct ud_tx_buf *tbuf = &udp->txbuf[i];
+
+               if (tbuf->mapping)
+                       ib_dma_unmap_single(ca, tbuf->mapping, tbuf->len,
+                                           DMA_TO_DEVICE);
+               if (tbuf->vaddr != NULL)
+                       kfree(tbuf->vaddr);
+       }
+       kfree(udp);
+}
+
+void xs_ud_free(void *msg)
+{
+       void *p = msg - sizeof(struct ib_grh);
+
+       XDDS_FUNCTION("%s: Freeing buffer: %p\n", __func__, p);
+       kfree(p);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xs_versions.h b/drivers/infiniband/ulp/xsigo/xscore/xs_versions.h
new file mode 100644 (file)
index 0000000..827b7ab
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XS_VERSIONS_H_INCLUDED__
+#define __XS_VERSIONS_H_INCLUDED__
+
+/*
+ * for the simplest implementation, the following is defined as hex integers
+ *
+ * e.g. version string 2.4.5  will be 0x020405
+ *
+ * The max version string can be: 255.255.255 (0xffffff)
+ *
+ */
+
+/* Current Linux driver version */
+#define XSIGO_LINUX_DRIVER_VERSION     0x030000        /* 3.0.0 */
+
+/* The minimum xsigos version that works with above driver version */
+#define MINIMUM_XSIGOS_VERSION         0x010504        /* 1.5.4 */
+
+#endif /* __XS_VERSIONS_H_INCLUDED__ */
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xscore.h b/drivers/infiniband/ulp/xsigo/xscore/xscore.h
new file mode 100644 (file)
index 0000000..8e92201
--- /dev/null
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _XSCORE_H_
+#define _XSCORE_H_
+
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/dma-mapping.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cm.h>
+
+#include <linux/version.h>
+
+#include <rdma/ib_addr.h>
+
+#include "xs_compat.h"
+
+#define        XSCORE_PORT_UP          100
+#define        XSCORE_PORT_DOWN        101
+
+/* Support MAX of 4 PAGES */
+#define        XSCORE_MAX_RXFRAGS      4
+
+enum xscore_conn_state {
+       XSCORE_CONN_INIT = 1,
+       XSCORE_CONN_ERR,
+       XSCORE_CONN_CONNECTED,
+       XSCORE_CONN_LDISCONNECTED,
+       XSCORE_CONN_RDISCONNECTED,
+       XSCORE_DEVICE_REMOVAL,
+};
+
+struct xscore_port;
+struct xscore_desc;
+
+struct xscore_buf_info {
+       unsigned long addr;
+       void *cookie;
+       int sz;
+       int status;
+       unsigned long time_stamp;
+};
+
+struct xscore_conn_ctx {
+       /*
+        * These are public attributes which needs to be set
+        * These can be made a different structure and copied
+        * over here XXX
+        */
+
+       int tx_ring_size;
+       int rx_ring_size;
+       int rx_buf_size;
+       /* In Interrupt mode coalescing parameters */
+       u32 tx_coalesce_usecs;
+       u32 tx_max_coalesced_frames;
+       u32 rx_coalesce_usecs;
+       u32 rx_max_coalesced_frames;
+       u32 features;
+#define        XSCORE_NO_SEND_COMPL_INTR       0x1
+#define        XSCORE_SG_SUPPORT               0x2
+#define        XSCORE_RDMA_SUPPORT             0x4
+#define        XSCORE_NO_RECV_COMPL_INTR       0x8
+#define        XSCORE_FMR_SUPPORT              0x10
+#define        XSCORE_DONT_FREE_SENDBUF        0x20
+#define        XSCORE_8K_IBMTU_SUPPORT         0x40
+#define        XSCORE_USE_CHECKSUM             (1 << 31)
+       void (*send_compl_handler)(void *client_arg, void *, int status,
+                                   int n);
+       void (*recv_msg_handler)(void *client_arg, void *, int sz, int status,
+                                 int n);
+       void (*recv_compl_handler)(void *client_arg);
+       void (*event_handler)(void *client_arg, int event);
+       u8 *(*alloc_buf)(void *client_arg, void **cookie, int sz);
+       struct page *(*alloc_page_bufs)(void *client_arg, void **cookie,
+                                        int *sz, int element);
+#define        XSCORE_SEND_BUF         1
+#define        XSCORE_RECV_BUF         2
+       void (*free_buf)(void *client_arg, void *cookie, int dir);
+       char priv_data[IB_CM_REQ_PRIVATE_DATA_SIZE];
+       int priv_data_len;
+       void *client_arg;
+       u64 service_id;
+       union ib_gid dgid;
+       u64 dguid;
+       u16 dlid;
+       int max_fmr_pages;
+       int fmr_pool_size;
+       u8 cm_timeout;
+       u8 cm_retries;
+       /*
+        * These are private attributes
+        */
+       spinlock_t lock;
+       struct mutex mlock;
+       enum xscore_conn_state state;
+       int status;
+       struct xscore_port *port;
+       struct ib_cm_id *cm_id;
+       struct ib_sa_path_rec path_rec;
+       struct ib_cq *scq;
+       struct ib_cq *rcq;
+       struct ib_qp *qp;
+       int local_qpn;
+       int remote_qpn;
+       struct ib_sge *tx_sge;
+       struct ib_fmr_pool *fmr_pool;
+       struct xscore_desc *tx_ring;
+       struct xscore_desc *rx_ring;
+       int next_xmit;
+       struct completion done;
+       int flags;
+#define        XSCORE_SYNCHRONOUS      0x1
+
+#define        XSCORE_NUM_RWC          128
+#define        XSCORE_NUM_SWC          8
+
+       struct ib_wc rwc[XSCORE_NUM_RWC];
+       int total_rwc;
+       int cur_rwc;
+       struct ib_wc swc[XSCORE_NUM_SWC];
+       int total_swc;
+       int cur_swc;
+};
+
+/*
+ * This bit is used to signal soft-hca to defer processing in case of
+ * called in interrupt disabled context
+ */
+#define        XSCORE_DEFER_PROCESS    (1 << 31)
+
+int xscore_post_send_sg(struct xscore_conn_ctx *ctx, struct sk_buff *skb,
+                       int oflags);
+int xscore_post_send(struct xscore_conn_ctx *ctx, void *addr, int len,
+                    int flags);
+int xscore_enable_txintr(struct xscore_conn_ctx *ctx);
+int xscore_enable_rxintr(struct xscore_conn_ctx *ctx);
+int xscore_conn_connect(struct xscore_conn_ctx *ctx, int flags);
+void xscore_conn_disconnect(struct xscore_conn_ctx *ctx, int flags);
+int xscore_conn_init(struct xscore_conn_ctx *ctx, struct xscore_port *port);
+void xscore_conn_destroy(struct xscore_conn_ctx *ctx);
+struct xscore_port *xscore_get_port(unsigned long hndl);
+int xscore_read_buf(struct xscore_conn_ctx *ctx, struct xscore_buf_info *bp);
+int xscore_poll_send(struct xscore_conn_ctx *ctx, struct xscore_buf_info *bp);
+int xscore_refill_recv(struct xscore_conn_ctx *ctx, int gfp_flags);
+u8 xscore_port_num(struct xscore_port *port);
+int xscore_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
+int xscore_wait_for_sessions(u8 calc_time);
+
+typedef void (*xcpm_receive_message_handler) (void *xsmp_hndl,
+                                             u8 *data, int length);
+typedef void (*xcpm_receive_event_handler) (void *xsmp_hndl, int event);
+typedef int (*xcpm_callout_event_handler) (char *name);
+
+enum xsmp_svc_state {
+       SVC_STATE_DOWN = 1,
+       SVC_STATE_UP,
+};
+
+struct xsmp_service_reg_info {
+       enum xsmp_svc_state svc_state;
+       xcpm_receive_message_handler receive_handler;
+       xcpm_receive_event_handler event_handler;
+       xcpm_callout_event_handler callout_handler;
+       u16 ctrl_message_type;
+       u16 resource_flag_index;
+       int flags;
+       atomic_t ref_cnt;
+};
+
+struct xsmp_session_info {
+       char chassis_name[64];
+       char session_name[64];
+       u32 version;
+       struct xscore_port *port;
+       struct ib_device *ib_device;
+       struct device *dma_device;
+       struct ib_pd *pd;
+       struct ib_mr *mr;
+       u8 is_shca;
+       u64 dguid;
+};
+
+/* extern declarations */
+extern u32 xcpm_resource_flags;
+extern int boot_flag;
+extern struct list_head xscore_port_list;
+extern int shca_csum;
+extern int xsigod_enable;
+extern char *os_version;
+extern char *os_arch;
+extern char hostname_str[];
+extern char system_id_str[];
+extern int xscore_create_procfs_entries(void);
+extern void xscore_remove_procfs_entries(void);
+extern void xcpm_port_add_proc_entry(struct xscore_port *port);
+extern void xcpm_port_remove_proc_entry(struct xscore_port *port);
+extern void xsmp_ulp_notify(struct xscore_port *p, int e);
+extern int xscore_wait_for_sessions(u8 cacl_time);
+
+/*
+ * All XCPM service message functions
+ */
+
+int xsmp_sessions_match(struct xsmp_session_info *, void*);
+
+int xcpm_register_service(struct xsmp_service_reg_info *s_info);
+
+int xcpm_unregister_service(int service_id);
+
+int xcpm_send_message(void *xsmp_hndl, int service_id,
+                     u8 *data, int length);
+
+int xcpm_get_xsmp_session_info(void *xsmp_hndl,
+                              struct xsmp_session_info *ip);
+
+int xcpm_check_duplicate_names(void *xsmp_hndl, char *name, u8 svc_id);
+int xcpm_send_msg_xsigod(void *xsmp_hndl, void *msg, int len);
+
+void *xcpm_alloc_msg(int sz);
+
+void xcpm_free_msg(void *msg);
+
+int xcpm_is_xsigod_enabled(void);
+
+#endif /* _XSCORE_H_ */
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xscore_api.c b/drivers/infiniband/ulp/xsigo/xscore/xscore_api.c
new file mode 100644 (file)
index 0000000..8b00c27
--- /dev/null
@@ -0,0 +1,1573 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * This file implements  XSCORE API used by client drivers
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <linux/jiffies.h>
+
+#include "xscore.h"
+#include "xscore_priv.h"
+#include "xsmp.h"
+
+/*
+ * For now, to enable the driver to use checksum, and not iCRC, the user should
+ * should shca to use set the following module parameters:
+ * # modprobe ib_xgc icrc_rx=1 icrc_tx=1
+ * # modprobe xscore shca_csum=1
+ * You'll need to do this on the chassis's shca too
+ */
+int shca_csum = 1;
+module_param(shca_csum, int, 0644);
+MODULE_PARM_DESC(shca_csum,
+"Set value to 1 to default the shca to use checksum instead of icrc32");
+
+struct xt_cm_private_data {
+       u64 vid;
+       u16 qp_type;
+       u16 max_ctrl_msg_size;
+       u32 data_qp_type;
+} __packed;
+
+struct xscore_desc {
+       dma_addr_t mapping;
+       dma_addr_t rxmapping[XSCORE_MAX_RXFRAGS];
+       void *vaddr;
+       size_t size;
+       dma_addr_t *sg_mapping;
+       struct sk_buff *skb;
+       struct page *page;
+       int flags;
+       unsigned long time_stamp;
+       enum dma_data_direction direction;
+};
+
+static int xscore_eth_mtu = IB_MTU_4096;
+module_param(xscore_eth_mtu, int, 0644);
+
+static int xscore_ib_mtu = IB_MTU_2048;
+module_param(xscore_ib_mtu, int, 0644);
+
+static int qp_retry_count = 6;
+module_param(qp_retry_count, int, 0644);
+
+static int qp_timeout = 16;
+module_param(qp_timeout, int, 0644);
+
+static int rdma_responder_resources = 16;
+
+module_param(rdma_responder_resources, int, 0644);
+
+static int xscore_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static void _xscore_conn_disconnect(struct xscore_conn_ctx *ctx, int flags);
+
+static void xscore_qp_event(struct ib_event *event, void *context)
+{
+       pr_err("QP event %d\n", event->event);
+}
+
+static void xscore_reset_rxdescriptor(struct xscore_desc *desc)
+{
+       desc->vaddr = 0;
+       desc->page = 0;
+       desc->skb = 0;
+       desc->sg_mapping = 0;
+}
+
+static int xscore_new_cm_id(struct xscore_conn_ctx *ctx)
+{
+       struct ib_cm_id *new_cm_id;
+
+       new_cm_id = ib_create_cm_id(ctx->port->xs_dev->device,
+                                   xscore_cm_handler, ctx);
+       if (IS_ERR(new_cm_id))
+               return PTR_ERR(new_cm_id);
+
+       if (ctx->cm_id)
+               ib_destroy_cm_id(ctx->cm_id);
+       ctx->cm_id = new_cm_id;
+
+       return 0;
+}
+
+static int xs_dma_map_tx(struct xscore_conn_ctx *ctx,
+                        struct xscore_desc *desc, int *nfrags)
+{
+       struct xscore_port *port = ctx->port;
+       struct ib_device *ca = port->xs_dev->device;
+       struct sk_buff *skb = desc->skb;
+       dma_addr_t *mapping = desc->sg_mapping;
+       int i;
+       int off;
+       struct ib_sge *tx_sge = ctx->tx_sge;
+
+       if (skb_headlen(skb)) {
+               mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
+                                              DMA_TO_DEVICE);
+               if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
+                       return -EIO;
+               ib_dma_sync_single_for_device(ca, mapping[0],
+                                             skb_headlen(skb), DMA_TO_DEVICE);
+
+               off = 1;
+               tx_sge[0].addr = mapping[0];
+               tx_sge[0].length = skb_headlen(skb);
+               tx_sge[0].lkey = port->xs_dev->mr->lkey;
+       } else
+               off = 0;
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               mapping[i + off] = ib_dma_map_page(ca, skb_frag_page(frag),
+                                                  frag->page_offset,
+                                                  skb_frag_size(frag),
+                                                  DMA_TO_DEVICE);
+               if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
+                       goto partial_error;
+               ib_dma_sync_single_for_device(ca, mapping[i + off],
+                                             frag->size, DMA_TO_DEVICE);
+               tx_sge[i + off].addr = mapping[i + off];
+               tx_sge[i + off].length = frag->size;
+               tx_sge[i + off].lkey = port->xs_dev->mr->lkey;
+       }
+       *nfrags = skb_shinfo(skb)->nr_frags + off;
+       return 0;
+
+partial_error:
+       for (; i > 0; --i) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
+
+               ib_dma_unmap_page(ca, mapping[i - !off], skb_frag_size(frag),
+                                 DMA_TO_DEVICE);
+       }
+
+       if (off)
+               ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb),
+                                   DMA_TO_DEVICE);
+
+       return -EIO;
+}
+
+static void xs_dma_unmap_tx(struct ib_device *ca, struct xscore_desc *desc)
+{
+       struct sk_buff *skb = desc->skb;
+       dma_addr_t *mapping = desc->sg_mapping;
+       int i;
+       int off;
+
+       if (skb_headlen(skb)) {
+               ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb),
+                                   DMA_TO_DEVICE);
+               off = 1;
+       } else
+               off = 0;
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               ib_dma_unmap_page(ca, mapping[i + off], skb_frag_size(frag),
+                                 DMA_TO_DEVICE);
+       }
+}
+
+int xscore_post_send_sg(struct xscore_conn_ctx *ctx, struct sk_buff *skb,
+                       int oflags)
+{
+       struct ib_send_wr wr, *bad_wr;
+       int ret;
+       int nfrags = 0;
+       struct xscore_desc *desc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ctx->lock, flags);
+
+       if (ctx->state != XSCORE_CONN_CONNECTED) {
+               ret = -ENOTCONN;
+               goto out;
+       }
+
+       desc = &ctx->tx_ring[ctx->next_xmit];
+       if (desc->skb) {
+               ret = -ENOBUFS;
+               goto out;
+       }
+
+       wr.next = NULL;
+       wr.wr_id = ctx->next_xmit;
+       wr.sg_list = ctx->tx_sge;
+       wr.opcode = IB_WR_SEND;
+       wr.send_flags = IB_SEND_SIGNALED;
+
+       desc->skb = skb;
+       /*
+        * perform DMA mapping of the SKB
+        */
+       ret = xs_dma_map_tx(ctx, desc, &nfrags);
+       if (unlikely(ret)) {
+               desc->skb = 0;
+               goto out;
+       }
+
+       ctx->next_xmit = (ctx->next_xmit + 1) % ctx->tx_ring_size;
+
+       wr.num_sge = nfrags;
+
+       if (oflags & XSCORE_DEFER_PROCESS)
+               wr.send_flags |= XSCORE_DEFER_PROCESS;
+
+       spin_unlock_irqrestore(&ctx->lock, flags);
+       /* Note the Time stamp */
+       desc->time_stamp = jiffies;
+
+       ret = ib_post_send(ctx->qp, &wr, &bad_wr);
+
+       if (ret) {
+               xs_dma_unmap_tx(ctx->port->xs_dev->device, desc);
+               desc->skb = 0;
+       }
+
+       IB_INFO("%s: ret %d, nxmit: %d, nfrags: %d\n", __func__,
+               ret, ctx->next_xmit, nfrags);
+       return ret;
+out:
+       spin_unlock_irqrestore(&ctx->lock, flags);
+       return ret;
+}
+EXPORT_SYMBOL(xscore_post_send_sg);
+
+int xscore_post_send(struct xscore_conn_ctx *ctx, void *addr, int len,
+                    int oflags)
+{
+       struct xscore_port *port = ctx->port;
+       struct ib_device *ca = port->xs_dev->device;
+       dma_addr_t mapping;
+       struct ib_sge list;
+       struct ib_send_wr wr, *bad_wr;
+       int ret = 0;
+       struct xscore_desc *desc;
+       unsigned long flags;
+
+       IB_INFO("%s: Addr: %p, Len: %d, DGUID: 0x%llx\n", __func__, addr,
+               len, ctx->dguid);
+
+       spin_lock_irqsave(&ctx->lock, flags);
+
+       if (ctx->state != XSCORE_CONN_CONNECTED) {
+               ret = -ENOTCONN;
+               goto out;
+       }
+
+       desc = &ctx->tx_ring[ctx->next_xmit];
+       if (desc->vaddr) {
+               ret = -ENOBUFS;
+               goto out;
+       }
+
+       mapping = ib_dma_map_single(ca, addr, len, DMA_TO_DEVICE);
+       if (unlikely(ib_dma_mapping_error(ca, mapping))) {
+               ret = -EIO;
+               goto out;
+       }
+
+       list.addr = mapping;
+       list.length = len;
+       list.lkey = port->xs_dev->mr->lkey;
+
+       wr.next = NULL;
+       wr.wr_id = ctx->next_xmit;
+       wr.sg_list = &list;
+       wr.num_sge = 1;
+       wr.opcode = IB_WR_SEND;
+       wr.send_flags = IB_SEND_SIGNALED;
+
+       ctx->next_xmit = (ctx->next_xmit + 1) % ctx->tx_ring_size;
+
+       if (oflags & XSCORE_DEFER_PROCESS)
+               wr.send_flags |= XSCORE_DEFER_PROCESS;
+
+       ib_dma_sync_single_for_device(ca, mapping, len, DMA_TO_DEVICE);
+
+       desc->vaddr = addr;
+       desc->mapping = mapping;
+       desc->size = len;
+       desc->skb = 0;
+
+       spin_unlock_irqrestore(&ctx->lock, flags);
+
+       ret = ib_post_send(ctx->qp, &wr, &bad_wr);
+
+       spin_lock_irqsave(&ctx->lock, flags);
+
+       if (ret) {
+               ib_dma_unmap_single(ca, mapping, len, DMA_TO_DEVICE);
+               desc->vaddr = 0;
+               desc->mapping = 0;
+       }
+out:
+       spin_unlock_irqrestore(&ctx->lock, flags);
+
+       IB_INFO("%s: ret %d, nxmit: %d\n", __func__, ret, ctx->next_xmit);
+
+       return ret;
+}
+EXPORT_SYMBOL(xscore_post_send);
+
+static int xs_post_recv(struct xscore_conn_ctx *ctx, int offset, int n,
+                       int gfp_flags, int fillholes)
+{
+       struct xscore_port *port = ctx->port;
+       struct ib_device *ca = port->xs_dev->device;
+       struct ib_sge list[XSCORE_MAX_RXFRAGS];
+       struct ib_recv_wr wr;
+       struct ib_recv_wr *bad_wr;
+       int i, j, ret = 0;
+       dma_addr_t *mapping;
+       int rsize = ctx->rx_buf_size;
+
+       for (i = 0; i < n; ++i, ++offset) {
+               struct xscore_desc *desc = &ctx->rx_ring[offset];
+               void *addr = NULL;
+
+               j = 1;
+
+               if (fillholes && (desc->vaddr || desc->page || desc->skb))
+                       continue;
+
+               xscore_reset_rxdescriptor(desc);
+
+               mapping = desc->rxmapping;
+
+               if (ctx->alloc_page_bufs) {
+                       desc->page =
+                           ctx->alloc_page_bufs(ctx->client_arg,
+                                                (void **)&desc->page, &rsize,
+                                                i);
+                       if (!desc->page)
+                               ret = -ENOMEM;
+               } else if (ctx->alloc_buf) {
+                       addr =
+                           ctx->alloc_buf(ctx->client_arg, (void **)&desc->skb,
+                                          rsize);
+                       if (!addr)
+                               ret = -ENOMEM;
+               } else {
+                       addr = kmalloc(rsize, gfp_flags);
+                       if (!addr)
+                               ret = -ENOMEM;
+               }
+
+               if (ret == ENOMEM) {
+                       if (fillholes)
+                               return ret;
+                       goto partial_failure;
+               }
+
+               desc->size = rsize;
+               /*
+                * Map the buffer and give the bus address
+                */
+               if (addr) {
+                       desc->vaddr = addr;
+                       mapping[0] = ib_dma_map_single(ca, addr, rsize,
+                                                      DMA_FROM_DEVICE);
+                       if (unlikely(ib_dma_mapping_error(ca, mapping[0]))) {
+                               ret = -EIO;
+                               if (fillholes)
+                                       return ret;
+                               goto partial_failure;
+                       }
+                       list[0].addr = mapping[0];
+                       list[0].length = rsize;
+                       list[0].lkey = port->xs_dev->mr->lkey;
+               } else {
+                       for (j = 0; j < (rsize / PAGE_SIZE); ++j) {
+                               /*
+                                * ESX doesn't allow to  reference page
+                                * descriptor in any form of pointer
+                                * arithmetic
+                                */
+                               mapping[j] =
+                                   ib_dma_map_page(ca, (desc->page + j), 0,
+                                                   PAGE_SIZE, DMA_FROM_DEVICE);
+                               if (unlikely
+                                   (ib_dma_mapping_error(ca, mapping[j]))) {
+                                       ret = -EIO;
+                                       for (; j > 0; --j)
+                                               ib_dma_unmap_page(ca,
+                                                       mapping[j - 1],
+                                                       PAGE_SIZE,
+                                                       DMA_FROM_DEVICE);
+                                       if (fillholes)
+                                               return ret;
+                                       goto partial_failure;
+                               }
+                               list[j].addr = mapping[j];
+                               list[j].length = PAGE_SIZE;
+                               list[j].lkey = port->xs_dev->mr->lkey;
+                       }
+               }
+
+               desc->sg_mapping = mapping;
+               wr.next = NULL;
+               wr.wr_id = (int)offset;
+               wr.sg_list = list;
+               wr.num_sge = j;
+               ret = ib_post_recv(ctx->qp, &wr, &bad_wr);
+               if (ret) {
+                       pr_err("xs_post_recv: ib_post_recv error,");
+                       pr_err("i = %d, ret = %d\n", i, ret);
+                       if (fillholes)
+                               return ret;
+                       goto partial_failure;
+               }
+       }
+       return 0;
+partial_failure:
+       pr_err("%s: Failed to allocate buffers\n", __func__);
+       for (; i >= 0; i--, offset--) {
+               struct xscore_desc *desc = &ctx->rx_ring[offset];
+
+               if (desc->sg_mapping) {
+                       if (desc->page) {
+                               for (j = 0; j < (rsize / PAGE_SIZE); ++j)
+                                       ib_dma_unmap_page(ca,
+                                                         desc->sg_mapping[j],
+                                                         PAGE_SIZE,
+                                                         DMA_FROM_DEVICE);
+                       } else {
+                               ib_dma_unmap_single(ca, desc->sg_mapping[0],
+                                                   rsize, DMA_FROM_DEVICE);
+                       }
+                       desc->sg_mapping = 0;
+               }
+               if (desc->page || desc->vaddr || desc->skb) {
+                       if (ctx->free_buf)
+                               ctx->free_buf(ctx->client_arg,
+                                             desc->page ? desc->
+                                             page : (desc->skb ? desc->skb :
+                                                     desc->vaddr),
+                                             XSCORE_RECV_BUF);
+                       else
+                               kfree(desc->vaddr);
+
+                       xscore_reset_rxdescriptor(desc);
+               }
+       }
+       return ret;
+}
+
+int xscore_refill_recv(struct xscore_conn_ctx *ctx, int gfp_flags)
+{
+       return xs_post_recv(ctx, 0, ctx->rx_ring_size, gfp_flags, 1);
+}
+EXPORT_SYMBOL(xscore_refill_recv);
+
+int xscore_enable_txintr(struct xscore_conn_ctx *ctx)
+{
+       return ib_req_notify_cq(ctx->scq,
+                               IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+}
+EXPORT_SYMBOL(xscore_enable_txintr);
+
+int xscore_enable_rxintr(struct xscore_conn_ctx *ctx)
+{
+       return ib_req_notify_cq(ctx->rcq,
+                               IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+}
+EXPORT_SYMBOL(xscore_enable_rxintr);
+
+static int _xscore_poll_send(struct xscore_conn_ctx *ctx)
+{
+       struct ib_device *ca = ctx->port->xs_dev->device;
+       struct ib_wc wc;
+       struct xscore_desc *desc;
+       int i;
+       int err = 0;
+       int ret;
+
+       IB_INFO("%s: Completion GUID: 0x%llx\n", __func__, ctx->dguid);
+
+       while ((ret = ib_poll_cq(ctx->scq, 1, &wc)) > 0) {
+               i = (int)wc.wr_id;
+               if (i >= ctx->tx_ring_size) {
+                       IB_ERROR("%s send completion error wr_id %d > %d\n",
+                                __func__, i, ctx->tx_ring_size);
+                       err++;
+                       break;
+               }
+               desc = &ctx->tx_ring[i];
+               if (desc->skb)
+                       xs_dma_unmap_tx(ca, desc);
+               else
+                       ib_dma_unmap_single(ca, desc->mapping, desc->size,
+                                           DMA_TO_DEVICE);
+
+               if (ctx->send_compl_handler)
+                       ctx->send_compl_handler(ctx->client_arg, desc->vaddr,
+                                               wc.status, i);
+               else if (ctx->free_buf)
+                       ctx->free_buf(ctx->client_arg,
+                                     desc->skb ? desc->skb : desc->vaddr,
+                                     XSCORE_SEND_BUF);
+               else if ((ctx->features & XSCORE_DONT_FREE_SENDBUF) == 0)
+                       kfree(desc->vaddr);
+
+               desc->mapping = 0;
+               desc->skb = 0;
+               desc->vaddr = 0;
+               if (wc.status) {
+                       err++;
+                       break;
+               }
+       }
+       if (!ret && !err)
+               return 0;
+       if (err)
+               return wc.status;
+       return ret;
+}
+
+static void xscore_send_completion(struct ib_cq *cq, void *ctx_ptr)
+{
+       struct xscore_conn_ctx *ctx = ctx_ptr;
+       int err;
+again:
+       err = _xscore_poll_send(ctx);
+       if (!err
+           && ib_req_notify_cq(ctx->scq,
+                               IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) >
+           0)
+               goto again;
+}
+
+int xscore_poll_send(struct xscore_conn_ctx *ctx, struct xscore_buf_info *bp)
+{
+       struct ib_device *ca = ctx->port->xs_dev->device;
+       struct ib_wc *wcp;
+       struct xscore_desc *desc;
+       int i;
+       int ret;
+
+       bp->status = 0;
+
+       /*
+        * Cache it here so that we do not go to IB stack every time
+        */
+       if (!ctx->total_swc) {
+               ret = ib_poll_cq(ctx->scq, XSCORE_NUM_SWC, &ctx->swc[0]);
+               if (ret > 0) {
+                       ctx->total_swc = ret;
+                       ctx->cur_swc = 0;
+               } else
+                       return ret;
+       }
+
+       ctx->total_swc--;
+       wcp = &ctx->swc[ctx->cur_swc++];
+       i = (int)wcp->wr_id;
+       if (i >= ctx->tx_ring_size) {
+               IB_ERROR("%s Send completion error wrid %d (> %d)\n",
+                        __func__, i, ctx->tx_ring_size);
+               return 0;
+       }
+       desc = &ctx->tx_ring[i];
+       if (desc->skb)
+               xs_dma_unmap_tx(ca, desc);
+       else
+               ib_dma_unmap_single(ca, desc->mapping, desc->size,
+                                   DMA_TO_DEVICE);
+       bp->addr = (unsigned long)desc->vaddr;
+       bp->sz = wcp->byte_len;
+       bp->cookie = desc->skb;
+       bp->time_stamp = desc->time_stamp;
+       desc->vaddr = 0;
+       desc->skb = 0;
+       desc->mapping = 0;
+       bp->status = wcp->status;
+       return 1;
+}
+EXPORT_SYMBOL(xscore_poll_send);
+
+int xscore_read_buf(struct xscore_conn_ctx *ctx, struct xscore_buf_info *bp)
+{
+       struct ib_device *ca = ctx->port->xs_dev->device;
+       struct ib_wc *wcp;
+       struct xscore_desc *desc;
+       int i, j;
+       int ret;
+
+       bp->status = 0;
+
+       /*
+        * Cache it here so that we do not go to IB stack every time
+        */
+       if (!ctx->total_rwc) {
+               ret = ib_poll_cq(ctx->rcq, XSCORE_NUM_RWC, &ctx->rwc[0]);
+               if (ret > 0) {
+                       ctx->total_rwc = ret;
+                       ctx->cur_rwc = 0;
+               } else
+                       return ret;
+       }
+       ret = 1;
+
+       ctx->total_rwc--;
+       wcp = &ctx->rwc[ctx->cur_rwc++];
+       i = (int)wcp->wr_id;
+       if (i >= ctx->rx_ring_size) {
+               IB_ERROR("%s completion event error with wrid %d (> %d)\n",
+                        __func__, i, ctx->rx_ring_size);
+               return 0;
+       }
+       desc = &ctx->rx_ring[i];
+       if (desc->page) {
+               for (j = 0; j < (desc->size / PAGE_SIZE); ++j)
+                       ib_dma_unmap_page(ca, desc->sg_mapping[j], PAGE_SIZE,
+                                         DMA_FROM_DEVICE);
+               bp->cookie = desc->page;
+       } else if (desc->skb || desc->vaddr) {
+               ib_dma_sync_single_for_cpu(ca, desc->sg_mapping[0], desc->size,
+                                          DMA_FROM_DEVICE);
+               ib_dma_unmap_single(ca, desc->sg_mapping[0], desc->size,
+                                   DMA_FROM_DEVICE);
+               bp->addr = (unsigned long)desc->vaddr;
+               bp->cookie = desc->skb;
+       } else {
+               ret = 0;
+               goto out;
+       }
+
+       bp->sz = wcp->byte_len;
+       bp->status = wcp->status;
+out:
+       xscore_reset_rxdescriptor(desc);
+       return ret;
+}
+EXPORT_SYMBOL(xscore_read_buf);
+
+static int xscore_poll_recv(struct xscore_conn_ctx *ctx)
+{
+       struct ib_device *ca = ctx->port->xs_dev->device;
+       struct ib_wc wc;
+       struct xscore_desc *desc;
+       int i, j;
+       void *vaddr;
+       int size;
+       int err = 0;
+       int ret = 0;
+
+       while ((ret = ib_poll_cq(ctx->rcq, 1, &wc)) > 0) {
+               i = (int)wc.wr_id;
+               if (i >= ctx->rx_ring_size) {
+                       IB_ERROR("%s completion error with wr_id%d > size %d\n",
+                                __func__, i, ctx->rx_ring_size);
+                       err++;
+                       break;
+               }
+               desc = &ctx->rx_ring[i];
+               if (desc->page) {
+                       for (j = 0; j < (desc->size / PAGE_SIZE); ++j)
+                               ib_dma_unmap_page(ca, desc->sg_mapping[j],
+                                                 PAGE_SIZE, DMA_FROM_DEVICE);
+               } else if (desc->skb || desc->vaddr) {
+                       ib_dma_sync_single_for_cpu(ca, desc->sg_mapping[0],
+                                                  desc->size, DMA_FROM_DEVICE);
+                       ib_dma_unmap_single(ca, desc->sg_mapping[0], desc->size,
+                                           DMA_FROM_DEVICE);
+               }
+               /*
+                * Post new buffer back
+                */
+               vaddr = desc->vaddr;
+               size = wc.byte_len;
+
+               xscore_reset_rxdescriptor(desc);
+
+               /*
+                * Call completion callback, pass buffer size
+                * and client arg and status
+                */
+               if (ctx->recv_msg_handler)
+                       ctx->recv_msg_handler(ctx->client_arg, vaddr, size,
+                                             wc.status, i);
+               /*
+                * If there is any error do not post anymore buffers
+                */
+               if (wc.status) {
+                       err++;
+                       break;
+               }
+               ctx->status = xs_post_recv(ctx, i, 1, GFP_ATOMIC, 0);
+       }
+       if (!ret && !err)
+               return 0;
+       if (err)
+               return wc.status;
+       return ret;
+}
+
+static void xscore_recv_completion(struct ib_cq *cq, void *ctx_ptr)
+{
+       struct xscore_conn_ctx *ctx = ctx_ptr;
+       int err;
+
+       if (ctx->recv_compl_handler) {
+               ctx->recv_compl_handler(ctx->client_arg);
+               return;
+       }
+again:
+       err = xscore_poll_recv(ctx);
+       if (!err
+           && ib_req_notify_cq(ctx->rcq,
+                               IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS) >
+           0)
+               goto again;
+}
+
+void xscore_conn_destroy(struct xscore_conn_ctx *ctx)
+{
+       int i;
+
+       mutex_lock(&ctx->mlock);
+       if (ctx->cm_id && !IS_ERR(ctx->cm_id))
+               ib_destroy_cm_id(ctx->cm_id);
+       if (ctx->qp && !IS_ERR(ctx->qp))
+               ib_destroy_qp(ctx->qp);
+       ctx->qp = 0;
+       /*
+        * Flush all recv and send completions
+        */
+       if (ctx->rcq && !IS_ERR(ctx->rcq)) {
+               if (ctx->recv_compl_handler)
+                       ctx->recv_compl_handler(ctx->client_arg);
+               else
+                       (void)xscore_poll_recv(ctx);
+               ib_destroy_cq(ctx->rcq);
+       }
+       ctx->rcq = 0;
+       if (ctx->scq && !IS_ERR(ctx->scq)) {
+               (void)_xscore_poll_send(ctx);
+               ib_destroy_cq(ctx->scq);
+       }
+       ctx->scq = 0;
+       if (ctx->tx_sge != NULL)
+               kfree(ctx->tx_sge);
+       ctx->tx_sge = 0;
+       if (ctx->tx_ring) {
+               for (i = 0; i < ctx->tx_ring_size; i++) {
+                       struct xscore_desc *desc = &ctx->tx_ring[i];
+
+                       if (desc->sg_mapping != NULL)
+                               kfree(desc->sg_mapping);
+                       desc->sg_mapping = 0;
+               }
+               vfree(ctx->tx_ring);
+       }
+       ctx->tx_ring = 0;
+       if (ctx->rx_ring)
+               vfree(ctx->rx_ring);
+       ctx->rx_ring = 0;
+       if (ctx->fmr_pool && !IS_ERR(ctx->fmr_pool))
+               ib_destroy_fmr_pool(ctx->fmr_pool);
+       ctx->fmr_pool = 0;
+       mutex_unlock(&ctx->mlock);
+       mutex_destroy(&ctx->mlock);
+}
+EXPORT_SYMBOL(xscore_conn_destroy);
+
+static int xscore_create_qpset(struct xscore_conn_ctx *ctx)
+{
+       struct ib_qp_init_attr init_attr;
+       int ret = 0, max_sge;
+
+       if (ctx->qp && !IS_ERR(ctx->qp))
+               ib_destroy_qp(ctx->qp);
+
+       memset(&init_attr, 0, sizeof(init_attr));
+       init_attr.event_handler = xscore_qp_event;
+       init_attr.cap.max_send_wr = ctx->tx_ring_size;
+       init_attr.cap.max_recv_wr = ctx->rx_ring_size;
+       init_attr.cap.max_recv_sge = XSCORE_MAX_RXFRAGS;
+
+       max_sge = ctx->port->xs_dev->dev_attr.max_sge;
+       if (max_sge >= (MAX_SKB_FRAGS + 1))
+               max_sge = MAX_SKB_FRAGS + 1;
+
+       if (ctx->features & XSCORE_SG_SUPPORT)
+               init_attr.cap.max_send_sge = max_sge;
+       else
+               init_attr.cap.max_send_sge = 1;
+       init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+       init_attr.qp_type = IB_QPT_RC;
+       init_attr.send_cq = ctx->scq;
+       init_attr.recv_cq = ctx->rcq;
+
+       ctx->qp = ib_create_qp(ctx->port->xs_dev->pd, &init_attr);
+       if (IS_ERR(ctx->qp)) {
+               ret = PTR_ERR(ctx->qp);
+               IB_ERROR("%s ib_create_qp failed %d\n", __func__, ret);
+       }
+       if ((ctx->features & XSCORE_NO_SEND_COMPL_INTR) == 0)
+               ib_req_notify_cq(ctx->scq, IB_CQ_NEXT_COMP);
+       if ((ctx->features & XSCORE_NO_RECV_COMPL_INTR) == 0)
+               ib_req_notify_cq(ctx->rcq, IB_CQ_NEXT_COMP);
+       return ret;
+}
+
+static int create_fmr_pool(struct xscore_conn_ctx *ctx)
+{
+       struct xscore_port *port = ctx->port;
+
+       struct ib_fmr_pool_param pool_params = {
+               .max_pages_per_fmr = ctx->max_fmr_pages,
+               .access = IB_ACCESS_LOCAL_WRITE |
+                   IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE,
+               .pool_size = ctx->fmr_pool_size,
+               .dirty_watermark = 32,
+               .page_shift = 12,
+               .flush_function = 0,
+               .flush_arg = 0,
+               .cache = 1
+       };
+
+       ctx->fmr_pool = ib_create_fmr_pool(port->xs_dev->pd, &pool_params);
+       if (IS_ERR(ctx->fmr_pool))
+               return PTR_ERR(ctx->fmr_pool);
+       return 0;
+}
+
+static void xscore_init_dest(struct xscore_conn_ctx *ctx)
+{
+       struct xscore_port *port = ctx->port;
+
+       if (port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+               ctx->dgid.global.subnet_prefix =
+                   port->sgid.global.subnet_prefix;
+               ctx->dgid.global.interface_id = cpu_to_be64(ctx->dguid);
+       } else {
+               /*
+                * iboe_mac_vlan_to_ll(...) not implemented in
+                * 3.10.0-123.el7.x86_6
+                * copied what was in iboe_mac_vlan_to_ll(...)
+                * Our driver used only IB_LINK_LAYER_INFINIBAND
+                * iboe_mac_vlan_to_ll(&ctx->dgid, dmac, 0);
+                */
+               u64 mac = ctx->dguid;
+               u8 dmac[6];
+               u16 vid = 0;
+               int i;
+               union ib_gid *gid = &ctx->dgid;
+
+               for (i = 0; i < 6; i++) {
+                       dmac[5 - i] = mac & 0xFF;
+                       mac >>= 8;
+               }
+               memset(gid->raw, 0, 16);
+               *((__be32 *) gid->raw) = cpu_to_be32(0xfe800000);
+               if (vid < 0x1000) {
+                       gid->raw[12] = vid & 0xff;
+                       gid->raw[11] = vid >> 8;
+               } else {
+                       gid->raw[12] = 0xfe;
+                       gid->raw[11] = 0xff;
+               }
+               memcpy((void *)(gid->raw + 13), (void *)(mac + 3), 3);
+               memcpy((void *)(gid->raw + 8), (void *)mac, 3);
+               gid->raw[8] ^= 2;
+
+       }
+}
+
+int xscore_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
+{
+       return ib_modify_cq(cq, cq_count, cq_period);
+}
+EXPORT_SYMBOL(xscore_modify_cq);
+
+int xscore_conn_init(struct xscore_conn_ctx *ctx, struct xscore_port *port)
+{
+       int i;
+       int ret = 0;
+
+       ctx->cm_id = 0;
+       ctx->port = port;
+       ctx->next_xmit = 0;
+       ctx->fmr_pool = 0;
+       ctx->total_rwc = 0;
+       ctx->cur_rwc = 0;
+       ctx->total_swc = 0;
+       ctx->cur_swc = 0;
+       spin_lock_init(&ctx->lock);
+       mutex_init(&ctx->mlock);
+       init_completion(&ctx->done);
+
+       xscore_init_dest(ctx);
+       /*
+        * Allocate descriptors
+        */
+       ctx->tx_ring = vmalloc(ctx->tx_ring_size * sizeof(struct xscore_desc));
+       if (!ctx->tx_ring)
+               return -ENOMEM;
+       memset(ctx->tx_ring, 0, ctx->tx_ring_size * sizeof(struct xscore_desc));
+
+       ctx->rx_ring = vmalloc(ctx->rx_ring_size * sizeof(struct xscore_desc));
+       if (!ctx->rx_ring) {
+               ret = -ENOMEM;
+               goto err;
+       }
+       memset(ctx->rx_ring, 0, ctx->rx_ring_size * sizeof(struct xscore_desc));
+
+       ctx->scq = ib_create_cq(ctx->port->xs_dev->device,
+                               xscore_send_completion, NULL, ctx,
+                               ctx->tx_ring_size, 0);
+       if (IS_ERR(ctx->scq)) {
+               ret = PTR_ERR(ctx->scq);
+               IB_ERROR("%s ib_create_cq scq  failed %d\n", __func__, ret);
+               goto err;
+       }
+
+       ctx->rcq = ib_create_cq(ctx->port->xs_dev->device,
+                               xscore_recv_completion, NULL, ctx,
+                               ctx->rx_ring_size, 0);
+       if (IS_ERR(ctx->rcq)) {
+               ret = PTR_ERR(ctx->rcq);
+               IB_ERROR("%s ib_create_cq scq  failed %d\n", __func__, ret);
+               goto err;
+       }
+
+       if ((ctx->features & XSCORE_NO_SEND_COMPL_INTR) == 0) {
+               ib_req_notify_cq(ctx->scq, IB_CQ_NEXT_COMP);
+               if (!ctx->tx_max_coalesced_frames || !ctx->tx_coalesce_usecs)
+                       xscore_modify_cq(ctx->scq, ctx->tx_max_coalesced_frames,
+                                        ctx->tx_coalesce_usecs);
+       }
+
+       if ((ctx->features & XSCORE_NO_RECV_COMPL_INTR) == 0) {
+               ib_req_notify_cq(ctx->rcq, IB_CQ_NEXT_COMP);
+               if (!ctx->rx_max_coalesced_frames || !ctx->rx_coalesce_usecs)
+                       xscore_modify_cq(ctx->rcq, ctx->rx_max_coalesced_frames,
+                                        ctx->rx_coalesce_usecs);
+       }
+
+       if (ctx->features & XSCORE_SG_SUPPORT) {
+               ctx->tx_sge =
+                   kmalloc(sizeof(struct ib_sge) * (MAX_SKB_FRAGS + 1),
+                           GFP_KERNEL);
+               if (!ctx->tx_sge) {
+                       ret = -ENOMEM;
+                       goto err;
+               }
+               for (i = 0; i < ctx->tx_ring_size; i++) {
+                       struct xscore_desc *desc = &ctx->tx_ring[i];
+
+                       desc->sg_mapping =
+                           kmalloc(sizeof(dma_addr_t) * (MAX_SKB_FRAGS + 1),
+                                   GFP_KERNEL);
+                       if (!desc->sg_mapping) {
+                               ret = -ENOMEM;
+                               goto err;
+                       }
+               }
+       }
+       ret = create_fmr_pool(ctx);
+       if ((ctx->features & XSCORE_FMR_SUPPORT)
+           && ret)
+               goto err;
+
+       return 0;
+err:
+       IB_ERROR("%s Error %d\n", __func__, ret);
+       xscore_conn_destroy(ctx);
+       return ret;
+}
+EXPORT_SYMBOL(xscore_conn_init);
+
+u8 xscore_port_num(struct xscore_port *port)
+{
+       return port->port_num;
+}
+EXPORT_SYMBOL(xscore_port_num);
+static void path_rec_complete(int status, struct ib_sa_path_rec *resp,
+                             void *context)
+{
+       struct xscore_conn_ctx *ctx = context;
+
+       IB_INFO("%s status %d\n", __func__, status);
+
+       if (status)
+               IB_ERROR("%s: completed with error %d\n", __func__, status);
+       else
+               memcpy(&ctx->path_rec, resp, sizeof(struct ib_sa_path_rec));
+       ctx->status = status;
+       complete(&ctx->done);
+}
+
+static int use_path_rec;
+
+static int xscore_send_req(struct xscore_conn_ctx *ctx)
+{
+       struct ib_cm_req_param req;
+       struct ib_sa_path_rec path_rec;
+       struct ib_port_attr port_attr;
+       struct ib_sa_query *query;
+       u16 pkey;
+       int status;
+
+       memset(&req, 0, sizeof(req));
+
+       req.primary_path = &ctx->path_rec;
+       req.alternate_path = NULL;
+       req.service_id = ctx->service_id;
+       req.qp_num = ctx->qp->qp_num;
+       req.qp_type = ctx->qp->qp_type;
+       req.private_data = ctx->priv_data;
+       req.private_data_len = ctx->priv_data_len;
+       req.flow_control = 1;
+       req.starting_psn = 0;
+       req.peer_to_peer = 0;
+       req.initiator_depth = 1;
+
+       if (ctx->priv_data_len == sizeof(struct xt_cm_private_data)) {
+               struct xt_cm_private_data *pdata =
+                   (struct xt_cm_private_data *)ctx->priv_data;
+               if (ctx->port->xs_dev->is_shca && shca_csum) {
+                       ctx->features |= XSCORE_USE_CHECKSUM;
+                       pdata->data_qp_type =
+                           cpu_to_be32(be32_to_cpu(pdata->data_qp_type) |
+                                       shca_csum);
+               } else
+                       ctx->features &= ~XSCORE_USE_CHECKSUM;
+       }
+
+       if (ctx->features & XSCORE_RDMA_SUPPORT)
+               req.responder_resources = min
+                               (ctx->port->xs_dev->dev_attr.max_qp_rd_atom,
+                               rdma_responder_resources);
+       else
+               req.responder_resources = 1;
+
+       req.remote_cm_response_timeout = 20;
+       req.local_cm_response_timeout = 20;
+       if (ctx->cm_timeout) {
+               req.remote_cm_response_timeout = ctx->cm_timeout;
+               req.local_cm_response_timeout = ctx->cm_timeout;
+       }
+       req.retry_count = qp_retry_count;
+       req.rnr_retry_count = 7;
+       req.max_cm_retries = 1;
+
+       memset(&path_rec, 0, sizeof(path_rec));
+
+       /*
+        * Fill up path record information here
+        */
+       (void)ib_query_port(ctx->port->xs_dev->device, ctx->port->port_num,
+                           &port_attr);
+       path_rec.slid = cpu_to_be16(port_attr.lid);
+       path_rec.dlid = cpu_to_be16(ctx->dlid);
+       path_rec.sgid = ctx->port->sgid;
+       path_rec.dgid = ctx->dgid;
+       ib_query_pkey(ctx->port->xs_dev->device, ctx->port->port_num, 0, &pkey);
+       path_rec.pkey = cpu_to_be16(pkey);
+       path_rec.numb_path = 1;
+
+       if (use_path_rec && ctx->port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+               /*
+                * If IB get path record from SA
+                */
+               status =
+                   ib_sa_path_rec_get(&xscore_sa_client,
+                                      ctx->port->xs_dev->device,
+                                      ctx->port->port_num, &path_rec,
+                                      IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID
+                                      | IB_SA_PATH_REC_PKEY |
+                                      IB_SA_PATH_REC_NUMB_PATH, 3000,
+                                      GFP_KERNEL, &path_rec_complete,
+                                      (void *)ctx, &query);
+
+               if (status) {
+                       IB_ERROR
+                           ("%s:ib_sa_path_rec_get completed with error %d\n",
+                            __func__, status);
+                       return status;
+               }
+
+               wait_for_completion(&ctx->done);
+               if (ctx->status) {
+                       IB_ERROR
+                           ("%s:wait_for_completion completed with error %d\n",
+                            __func__, ctx->status);
+                       return ctx->status;
+               }
+       } else {
+               req.primary_path = &path_rec;
+
+               if (ctx->port->link_layer == IB_LINK_LAYER_ETHERNET) {
+                       path_rec.mtu = port_attr.active_mtu;
+                       /*
+                        * LLE card has an issue where it reports
+                        * active MTU=4 for Jumbo and not 5
+                        */
+                       if (path_rec.mtu == 4)
+                               path_rec.mtu = 5;
+
+                       /*
+                        * 8k IB MTU support is  for vnics only
+                        */
+                       if (!(ctx->features & XSCORE_8K_IBMTU_SUPPORT)) {
+                               if (path_rec.mtu > xscore_eth_mtu)
+                                       path_rec.mtu = xscore_eth_mtu;
+                               if (xscore_eth_mtu > 5)
+                                       path_rec.mtu = 5;
+                       }
+
+                       path_rec.hop_limit = 2;
+               } else {
+                       path_rec.mtu = xscore_ib_mtu;
+                       path_rec.hop_limit = 0;
+               }
+               path_rec.reversible = 1;
+               path_rec.mtu_selector = 3;
+               path_rec.rate_selector = 2;
+               path_rec.rate = 3;
+               path_rec.packet_life_time_selector = 2;
+               path_rec.packet_life_time = 14;
+       }
+
+       init_completion(&ctx->done);
+       status = ib_send_cm_req(ctx->cm_id, &req);
+       if (status)
+               IB_ERROR("%s:ib_send_cm_req completed with error %d\n",
+                        __func__, status);
+       return status;
+}
+
+int xscore_conn_connect(struct xscore_conn_ctx *ctx, int flags)
+{
+       int ret;
+
+       IB_FUNCTION("%s: Connecting to 0x%llx, LID: 0x%x, SID: 0x%llx\n",
+                   __func__, ctx->dguid, ctx->dlid, ctx->service_id);
+
+       mutex_lock(&ctx->mlock);
+       _xscore_conn_disconnect(ctx, flags);
+       xscore_init_dest(ctx);
+       ret = xscore_create_qpset(ctx);
+       if (ret) {
+               IB_ERROR("%s xscore_create_qpset failed %d\n", __func__, ret);
+               mutex_unlock(&ctx->mlock);
+               return ret;
+       }
+       ctx->next_xmit = 0;
+
+       ret = xscore_new_cm_id(ctx);
+       if (ret) {
+               IB_ERROR("%s ib_create_cmid failed %d\n", __func__, ret);
+               ctx->cm_id = 0;
+               mutex_unlock(&ctx->mlock);
+               return ret;
+       }
+       init_completion(&ctx->done);
+       ctx->flags |= flags;
+       ret = xscore_send_req(ctx);
+       if (ret) {
+               IB_ERROR("%s xscore_send_req failed %d\n", __func__, ret);
+               mutex_unlock(&ctx->mlock);
+               return ret;
+       }
+       /*
+        * The user wants synchronous completion, wait for connection
+        * to be setup or fail
+        */
+       if (flags & XSCORE_SYNCHRONOUS)
+               wait_for_completion(&ctx->done);
+       ctx->flags &= ~flags;
+       mutex_unlock(&ctx->mlock);
+       if (flags & XSCORE_SYNCHRONOUS)
+               return ctx->status;
+       else
+               return ret;
+}
+EXPORT_SYMBOL(xscore_conn_connect);
+
+static void xscore_reclaim_recv_buffers(struct xscore_conn_ctx *ctx)
+{
+       struct ib_device *ca = ctx->port->xs_dev->device;
+       struct ib_wc wc;
+       struct xscore_desc *desc;
+       int i, j;
+
+       while (ib_poll_cq(ctx->rcq, 1, &wc) > 0) {
+               i = (int)wc.wr_id;
+               if (i >= ctx->rx_ring_size) {
+                       IB_ERROR("%s completion error with wrid %d (> %d)\n",
+                                __func__, i, ctx->rx_ring_size);
+                       break;
+               }
+               desc = &ctx->rx_ring[i];
+               if (!desc->page && !desc->vaddr && !desc->skb) {
+                       IB_ERROR("%s: Bad RCQ completion id: %d, qpn: %d\n",
+                                __func__, i, ctx->local_qpn);
+                       continue;
+               }
+
+               if (desc->page) {
+                       for (j = 0; j < (desc->size / PAGE_SIZE); ++j)
+                               ib_dma_unmap_page(ca, desc->sg_mapping[j],
+                                                 PAGE_SIZE, DMA_FROM_DEVICE);
+               } else if (desc->skb || desc->vaddr) {
+                       ib_dma_unmap_single(ca, desc->sg_mapping[0], desc->size,
+                                           DMA_FROM_DEVICE);
+               }
+
+               if (ctx->free_buf) {
+                       ctx->free_buf(ctx->client_arg,
+                                     desc->page ? desc->
+                                     page : (desc->skb ? desc->skb :
+                                             desc->vaddr), XSCORE_RECV_BUF);
+               } else {
+                       kfree(desc->vaddr);
+               }
+               xscore_reset_rxdescriptor(desc);
+
+       }
+       for (i = 0; i < ctx->rx_ring_size; ++i) {
+               desc = &ctx->rx_ring[i];
+
+               if (desc->page || desc->vaddr || desc->skb) {
+                       if (desc->page) {
+                               for (j = 0; j < (desc->size / PAGE_SIZE); ++j)
+                                       ib_dma_unmap_page(ca,
+                                                         desc->sg_mapping[j],
+                                                         PAGE_SIZE,
+                                                         DMA_FROM_DEVICE);
+                       } else if (desc->skb || desc->vaddr) {
+                               ib_dma_unmap_single(ca, desc->sg_mapping[0],
+                                                   desc->size,
+                                                   DMA_FROM_DEVICE);
+                       }
+                       if (ctx->free_buf) {
+                               ctx->free_buf(ctx->client_arg,
+                                             desc->page ? desc->
+                                             page : (desc->skb ? desc->skb :
+                                                     desc->vaddr),
+                                             XSCORE_RECV_BUF);
+                       } else {
+                               kfree(desc->vaddr);
+                       }
+
+                       xscore_reset_rxdescriptor(desc);
+               }
+       }
+}
+
+static void xscore_reclaim_send_buffers(struct xscore_conn_ctx *ctx)
+{
+       struct ib_device *ca = ctx->port->xs_dev->device;
+       struct ib_wc wc;
+       struct xscore_desc *desc;
+       int i;
+
+       while (ib_poll_cq(ctx->scq, 1, &wc) > 0) {
+               i = (int)wc.wr_id;
+               if (i >= ctx->tx_ring_size) {
+                       IB_ERROR("%s Send completion error wrid %d (> %d)\n",
+                                __func__, i, ctx->tx_ring_size);
+                       break;
+               }
+               desc = &ctx->tx_ring[i];
+               if (desc->skb)
+                       xs_dma_unmap_tx(ca, desc);
+               else if (desc->vaddr)
+                       ib_dma_unmap_single(ca, desc->mapping, desc->size,
+                                           DMA_TO_DEVICE);
+               else {
+                       IB_ERROR("%s: Bad SCQ completion id: %d, qpn: %d\n",
+                                __func__, i, ctx->local_qpn);
+                       continue;
+               }
+               if (ctx->free_buf)
+                       ctx->free_buf(ctx->client_arg,
+                                     desc->skb ? desc->skb : desc->vaddr,
+                                     XSCORE_SEND_BUF);
+               else if ((ctx->features & XSCORE_DONT_FREE_SENDBUF) == 0)
+                       kfree(desc->vaddr);
+               desc->vaddr = 0;
+               desc->skb = 0;
+               desc->mapping = 0;
+       }
+       for (i = 0; i < ctx->tx_ring_size; ++i) {
+               desc = &ctx->tx_ring[i];
+
+               if (desc->vaddr || desc->skb) {
+                       if (desc->skb)
+                               xs_dma_unmap_tx(ca, desc);
+                       else
+                               ib_dma_unmap_single(ca, desc->mapping,
+                                                   desc->size, DMA_TO_DEVICE);
+                       if (ctx->free_buf)
+                               ctx->free_buf(ctx->client_arg,
+                                             desc->skb ? desc->
+                                             skb : desc->vaddr,
+                                             XSCORE_SEND_BUF);
+                       else if ((ctx->features & XSCORE_DONT_FREE_SENDBUF) ==
+                                0)
+                               kfree(desc->vaddr);
+                       desc->vaddr = 0;
+                       desc->skb = 0;
+                       desc->mapping = 0;
+               }
+       }
+}
+
+static void _xscore_conn_disconnect(struct xscore_conn_ctx *ctx, int oflags)
+{
+       struct ib_qp_attr qp_attr;
+       unsigned long flags;
+
+       IB_FUNCTION("%s: Disconnecting to 0x%llx, LID: 0x%x\n",
+                   __func__, ctx->dguid, ctx->dlid);
+
+       qp_attr.qp_state = IB_QPS_RESET;
+       if (ctx->qp && !IS_ERR(ctx->qp))
+               (void)ib_modify_qp(ctx->qp, &qp_attr, IB_QP_STATE);
+
+       spin_lock_irqsave(&ctx->lock, flags);
+       ctx->state = XSCORE_CONN_INIT;
+       spin_unlock_irqrestore(&ctx->lock, flags);
+
+       init_completion(&ctx->done);
+       ctx->flags |= oflags;
+       if (ctx->cm_id && !ib_send_cm_dreq(ctx->cm_id, NULL, 0)) {
+               if (oflags & XSCORE_SYNCHRONOUS)
+                       wait_for_completion(&ctx->done);
+       }
+       ctx->flags &= ~oflags;
+       /*
+        * This guarantees no CM callbacks are pending after destroy
+        */
+       if (ctx->cm_id && !IS_ERR(ctx->cm_id))
+               ib_destroy_cm_id(ctx->cm_id);
+       ctx->cm_id = 0;
+       IB_FUNCTION("%s: Disconnected to 0x%llx\n", __func__, ctx->dguid);
+       /*
+        * Reclaim all buffers back here
+        */
+
+       ctx->total_rwc = 0;
+       ctx->cur_rwc = 0;
+       ctx->total_swc = 0;
+       ctx->cur_swc = 0;
+
+       xscore_reclaim_send_buffers(ctx);
+       xscore_reclaim_recv_buffers(ctx);
+}
+
+void xscore_conn_disconnect(struct xscore_conn_ctx *ctx, int flags)
+{
+       mutex_lock(&ctx->mlock);
+       _xscore_conn_disconnect(ctx, flags);
+       mutex_unlock(&ctx->mlock);
+}
+EXPORT_SYMBOL(xscore_conn_disconnect);
+
+static void handle_cm_rep(struct xscore_conn_ctx *ctx)
+{
+       struct ib_qp_attr qp_attr;
+       int attr_mask = 0;
+
+       memset(&qp_attr, 0, sizeof(qp_attr));
+       qp_attr.qp_state = IB_QPS_INIT;
+       ctx->status = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &attr_mask);
+       if (ctx->status) {
+               IB_ERROR("ib_cm_init_qp_attr: QP to INIT\n");
+               return;
+       }
+       if (ctx->features & XSCORE_USE_CHECKSUM)
+               attr_mask |= XSCORE_USE_CHECKSUM;
+       ctx->status = ib_modify_qp(ctx->qp, &qp_attr, attr_mask);
+       if (ctx->status) {
+               IB_ERROR("ib: QP to INIT error\n");
+               return;
+       }
+
+       memset(&qp_attr, 0, sizeof(qp_attr));
+       qp_attr.qp_state = IB_QPS_RTR;
+       attr_mask = 0;
+       ctx->status = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &attr_mask);
+       if (ctx->status) {
+               IB_ERROR("ib_cm_init_qp_attr: QP to RTR, status=%d\n",
+                        ctx->status);
+               return;
+       }
+
+       ctx->remote_qpn = qp_attr.dest_qp_num;
+       ctx->local_qpn = ctx->qp->qp_num;
+
+       if (ctx->features & XSCORE_RDMA_SUPPORT) {
+               attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC;
+               qp_attr.max_dest_rd_atomic = min (ctx->port->xs_dev->dev_attr.max_qp_rd_atom,
+                                               rdma_responder_resources);
+       } else {
+               qp_attr.max_dest_rd_atomic = 4;
+       }
+
+       attr_mask |= IB_QP_MIN_RNR_TIMER;
+       qp_attr.min_rnr_timer = IB_RNR_TIMER_000_16;
+       /*
+        * Handle some attributes for LLE
+        */
+       if (ctx->port->link_layer == IB_LINK_LAYER_ETHERNET) {
+               attr_mask |= IB_QP_RQ_PSN;
+               qp_attr.rq_psn = 0;
+               attr_mask |= IB_QP_AV;
+               qp_attr.ah_attr.grh.dgid = ctx->dgid;
+               qp_attr.ah_attr.sl = 0;
+               qp_attr.ah_attr.port_num = ctx->port->port_num;
+               qp_attr.ah_attr.grh.hop_limit = 1;
+       }
+
+       ctx->status = ib_modify_qp(ctx->qp, &qp_attr, attr_mask);
+       if (ctx->status) {
+               IB_ERROR("ib_cm_modify_qp: QP to RTR error, status=%d\n",
+                        ctx->status);
+               return;
+       }
+       ctx->status = xs_post_recv(ctx, 0, ctx->rx_ring_size, GFP_KERNEL, 0);
+       if (ctx->status) {
+               IB_ERROR("ib: xs_post_recv error\n");
+               return;
+       }
+
+       memset(&qp_attr, 0, sizeof(qp_attr));
+       attr_mask = 0;
+       qp_attr.qp_state = IB_QPS_RTS;
+       ctx->status = ib_cm_init_qp_attr(ctx->cm_id, &qp_attr, &attr_mask);
+       if (ctx->status)
+               return;
+       attr_mask |= IB_QP_TIMEOUT;
+       qp_attr.timeout = qp_timeout;
+       ctx->status = ib_modify_qp(ctx->qp, &qp_attr, attr_mask);
+       if (ctx->status) {
+               IB_ERROR("ib: QP to RTS error\n");
+               return;
+       }
+       ctx->status = ib_send_cm_rtu(ctx->cm_id, NULL, 0);
+       if (ctx->status) {
+               IB_ERROR("ib: ib_send_cm_rtu error\n");
+               return;
+       }
+}
+
+static int xscore_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+       struct xscore_conn_ctx *ctx = cm_id->context;
+       int comp = 0;
+       struct ib_qp_attr qp_attr;
+       int cback = 1;
+
+       switch (event->event) {
+       case IB_CM_REQ_ERROR:
+               IB_INFO("%s IB_CM_REQ_ERROR DGUID 0x%llx\n", __func__,
+                       ctx->dguid);
+               ctx->state = XSCORE_CONN_ERR;
+               ctx->status = -ECONNRESET;
+               comp = 1;
+               break;
+       case IB_CM_REP_RECEIVED:
+               IB_INFO("%s IB_CM_REP_RCVD DGUID 0x%llx\n", __func__,
+                       ctx->dguid);
+               comp = 1;
+               /*
+                * Now handle CM rep from remote end
+                */
+               handle_cm_rep(ctx);
+               if (ctx->status)
+                       ctx->state = XSCORE_CONN_ERR;
+               else
+                       ctx->state = XSCORE_CONN_CONNECTED;
+               break;
+       case IB_CM_REJ_RECEIVED:
+               IB_INFO("%s IB_CM_REJ_RCVD DGUID 0x%llx", __func__, ctx->dguid);
+               IB_INFO(",reason: %d, ", event->param.rej_rcvd.reason);
+               IB_INFO("SID: 0x%llx\n", ctx->service_id);
+               comp = 1;
+               ctx->status = -ECONNRESET;
+               ctx->state = XSCORE_CONN_ERR;
+               break;
+       case IB_CM_DREQ_RECEIVED:
+               /*
+                * Handle this gracefully and try to re-connect
+                */
+               IB_INFO("%s IB_CM_DREQ_RCVD DGUID 0x%llx\n", __func__,
+                       ctx->dguid);
+               qp_attr.qp_state = IB_QPS_RESET;
+               (void)ib_modify_qp(ctx->qp, &qp_attr, IB_QP_STATE);
+               ib_send_cm_drep(ctx->cm_id, NULL, 0);
+               comp = 1;
+               ctx->state = XSCORE_CONN_RDISCONNECTED;
+               break;
+       case IB_CM_DREP_RECEIVED:
+               IB_INFO("%s IB_CM_DREP_RCVD DGUID 0x%llx\n", __func__,
+                       ctx->dguid);
+               comp = 1;
+               ctx->status = 0;
+               ctx->state = XSCORE_CONN_LDISCONNECTED;
+               break;
+       case IB_CM_DREQ_ERROR:
+               IB_INFO("%s IB_CM_DREQ_ERR DGUID 0x%llx\n", __func__,
+                       ctx->dguid);
+               comp = 1;
+               ctx->status = -ECONNRESET;
+               ctx->state = XSCORE_CONN_LDISCONNECTED;
+               break;
+       case IB_CM_TIMEWAIT_EXIT:
+               cback = 0;
+               break;
+       default:
+               cback = 0;
+               break;
+       }
+       if (comp && cback && (ctx->flags & XSCORE_SYNCHRONOUS))
+               complete(&ctx->done);
+       if (ctx->event_handler && cback)
+               ctx->event_handler(ctx->client_arg, ctx->state);
+       return 0;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xscore_impl.c b/drivers/infiniband/ulp/xsigo/xscore/xscore_impl.c
new file mode 100644 (file)
index 0000000..3e8f7a2
--- /dev/null
@@ -0,0 +1,1061 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * This file implements XDS/XDDS protocol as well as XSMP protocol
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <linux/jiffies.h>
+#include <linux/proc_fs.h>
+
+#include "xscore_priv.h"
+#include "xs_compat.h"
+#include "xs_versions.h"
+#include "xscore.h"
+
+#ifndef XSIGO_LOCAL_VERSION
+#define XSCORE_VERSION "Unknown"
+#error "No Version"
+#else
+#define XSCORE_VERSION XSIGO_LOCAL_VERSION
+#endif
+
+MODULE_AUTHOR("Oracle corp (OVN-linux-drivers@oracle.com)");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("OVN core driver");
+MODULE_VERSION(XSCORE_VERSION);
+
+int xscore_debug = 0x0;
+module_param(xscore_debug, int, 0644);
+
+int xscore_force_sm_change;
+module_param(xscore_force_sm_change, int, 0644);
+int xscore_notify_ulps;
+module_param(xscore_notify_ulps, int, 0644);
+
+char hostname_str[XSIGO_MAX_HOSTNAME + 1];
+char system_id_str[64];
+
+static char *hostname;
+module_param(hostname, charp, 0444);
+static char *system_id;
+module_param(system_id, charp, 0444);
+
+char *os_version;
+module_param(os_version, charp, 0444);
+char *os_arch;
+module_param(os_arch, charp, 0444);
+
+#if defined(INDIVIDUAL_HEAPS)
+vmk_heapid ib_basic_heapid;
+#endif
+
+struct ib_sa_client xscore_sa_client;
+struct list_head xscore_port_list;
+struct mutex xscore_port_mutex;
+
+static void xscore_add_one(struct ib_device *device);
+static void xscore_remove_one(struct ib_device *device);
+static void xds_send_handler(struct ib_mad_agent *agent,
+                            struct ib_mad_send_wc *mad_send_wc);
+static void xds_recv_handler(struct ib_mad_agent *mad_agent,
+                            struct ib_mad_recv_wc *mad_recv_wc);
+static int xds_check_xcm_record(struct xscore_port *port,
+                               struct ib_xds_mad *xds_mad);
+static void xscore_port_event_handler(struct work_struct *work);
+
+static struct ib_client xscore_client = {
+       .name = "xscore",
+       .add = xscore_add_one,
+       .remove = xscore_remove_one
+};
+
+/*
+ * Initialize XDS mad agent to send and receive XDS query
+ */
+static int xscore_init_mad_agent(struct xscore_port *port)
+{
+       struct xscore_dev *xs_dev = port->xs_dev;
+       struct ib_mad_reg_req mad_reg_req;
+
+       memset(&mad_reg_req, 0, sizeof(struct ib_mad_reg_req));
+       mad_reg_req.mgmt_class = XSIGO_MGMT_CLASS;
+       mad_reg_req.mgmt_class_version = XSIGO_MGMT_CLASS_VERSION;
+       set_bit(IB_MGMT_METHOD_GET, mad_reg_req.method_mask);
+
+       port->mad_agent = ib_register_mad_agent(xs_dev->device,
+                                               port->port_num, IB_QPT_GSI,
+                                               &mad_reg_req, 0,
+                                               xds_send_handler,
+                                               xds_recv_handler, (void *)port,
+                                               0);
+
+       if (IS_ERR(port->mad_agent)) {
+               IB_ERROR("Failure registering mad-handle for ");
+               IB_ERROR("port %d,", port->port_num);
+               IB_ERROR("GUID: 0x%llx\n", port->guid);
+               return PTR_ERR(port->mad_agent);
+       }
+       return 0;
+}
+
+/*
+ * This is the callback for service record query by the IB MAD layer
+ */
+static void service_rec_callback(int status, struct ib_sa_service_rec *resp,
+                                void *context)
+{
+       struct xscore_port *port = (struct xscore_port *)context;
+
+       if (!status && resp) {
+               port->xds_lid = be16_to_cpu(resp->data16[0]);
+               port->xds_guid = be64_to_cpu(resp->data64[0]);
+       } else {
+               XDS_INFO("service_rec_callback: failed code: %d,", status);
+               XDS_INFO("port %d, GUID: 0x%llx\n", port->port_num, port->guid);
+               port->counters[PORT_XDS_SA_QUERY_TOUT_COUNTER]++;
+               set_bit(XSCORE_FORCE_SM_CHANGE, &port->flags);
+       }
+       port->sa_query_status = status;
+       /*
+        * Wake up thread waiting
+        */
+       XDS_INFO("service_rec_callback: success code: %d, GUID: 0x%llx\n",
+                status, port->guid);
+       complete(&port->sa_query_done);
+}
+
+static void xdds_msg_handler(struct work_struct *work)
+{
+       struct xdds_work *xwork = container_of(work, struct xdds_work,
+                                              work);
+       struct xdp_hdr *msghdr = (struct xdp_hdr *)xwork->msg;
+
+       xscore_set_wq_state(XSCORE_WQ_XDDS_HANDLER);
+       switch (ntohs(msghdr->type)) {
+
+       case XDP_MSG_TYPE_DISC_SOL:
+
+               /* Unicast from chassis (xcfm info) */
+               if (ntohs(msghdr->flags) & XDP_FLAGS_RSP) {
+                       struct ib_xds_mad xds_mad;
+
+                       memset(&xds_mad, 0, sizeof(struct ib_xds_mad));
+                       memcpy(xds_mad.data,
+                              xwork->msg + sizeof(struct xdp_hdr),
+                              sizeof(struct xcm_list));
+                       /*
+                        * Now call XCM list handling routine
+                        */
+                       xds_check_xcm_record(xwork->port, &xds_mad);
+               }
+               break;
+       default:
+               XDDS_ERROR("%s: Port GUID: ", __func__);
+               XDDS_ERROR("0x%llx", xwork->port->guid);
+               XDDS_ERROR("Unexpected protocol type");
+               XDDS_ERROR(" %d\n", ntohs(msghdr->type));
+               break;
+       }
+       xs_ud_free(xwork->msg);
+       kfree(xwork);
+       xscore_clear_wq_state(XSCORE_WQ_XDDS_HANDLER);
+}
+
+static void xs_ud_callback(void *arg, void *msg, int len)
+{
+       struct xscore_port *port = arg;
+       struct xdds_work *xwork;
+       unsigned long flags;
+
+       /*
+        * Grab spin lock and check for SHUTDOWN state
+        */
+       spin_lock_irqsave(&port->lock, flags);
+       if (test_bit(XSCORE_PORT_SHUTDOWN, &port->flags))
+               goto out;
+       xwork = kzalloc(sizeof(struct xdds_work), GFP_ATOMIC);
+       if (xwork) {
+               xwork->msg = (u8 *) msg;
+               xwork->msg_len = len;
+               xwork->port = port;
+               INIT_WORK(&xwork->work, xdds_msg_handler);
+               queue_work(port->port_wq, &xwork->work);
+       } else
+out :
+               xs_ud_free(msg);
+       spin_unlock_irqrestore(&port->lock, flags);
+}
+
+#define        XSCORE_SA_QUERY_TIMEOUT (3*1000)
+
+/*
+ * This function queries SA for XDS service record. This is synchronous
+ * and needs to be called  in thread/workq context
+ */
+
+int xscore_query_svc_record(struct xscore_port *port)
+{
+       struct xscore_dev *xs_dev = port->xs_dev;
+       struct ib_sa_service_rec service_rec;
+       struct ib_sa_query *query;
+       struct ib_port_attr attr;
+       int ret;
+
+       memset(&service_rec, 0, sizeof(service_rec));
+       strcpy(service_rec.name, "XSIGOXDS");
+       init_completion(&port->sa_query_done);
+
+       if (xscore_notify_ulps || (xscore_force_sm_change &&
+                                  test_and_clear_bit(XSCORE_FORCE_SM_CHANGE,
+                                                     &port->flags))) {
+               XDS_INFO("ib_sa_force_update: port %d GUID: 0x%llx\n",
+                        port->port_num, port->guid);
+               attr.sm_lid = port->sm_lid;
+               attr.lid = port->lid;
+               /* mode = 1 Notify ULPs about IB events */
+               ib_sa_force_update(&xscore_sa_client,
+                                  xs_dev->device, &attr, port->port_num,
+                                  xscore_notify_ulps);
+       }
+       port->rec_poller_state = XDS_RECP_SAUPDATE_DONE;
+
+       ret = ib_sa_service_rec_query(&xscore_sa_client,
+                                     xs_dev->device, port->port_num,
+                                     IB_MGMT_METHOD_GET, &service_rec,
+                                     IB_SA_SERVICE_REC_SERVICE_NAME,
+                                     XSCORE_SA_QUERY_TIMEOUT, GFP_KERNEL,
+                                     &service_rec_callback, port, &query);
+       port->rec_poller_state = XDS_RECP_SAREC_DONE;
+       if (ret) {
+               XDS_INFO("ib_sa_service_rec_query: failed %d ret,", ret);
+               XDS_INFO(" port: %d,", port->port_num);
+               XDS_INFO(" GUID: 0x%llx\n:", port->guid);
+               port->counters[PORT_XDS_SA_QUERY_ERROR_COUNTER]++;
+               return ret;
+       }
+       port->counters[PORT_XDS_SA_QUERY_COUNTER]++;
+       /*
+        * This is get out of jail in case we do not
+        * get any completion, must never happen
+        */
+       if (!wait_for_completion_timeout(&port->sa_query_done,
+                                        msecs_to_jiffies
+                                        (XSCORE_SA_QUERY_TIMEOUT * 10))) {
+               XDS_ERROR("%s: completion timeout, port: %d, GUID: 0x%llx\n:",
+                         __func__, port->port_num, port->guid);
+               return -ETIMEDOUT;
+       }
+       return port->sa_query_status;
+}
+
+static void create_ib_mad_header(struct xscore_port *port,
+                                struct ib_xds_mad *xds_mad)
+{
+       struct ib_mad_hdr *mad_hdr = &xds_mad->mad_hdr;
+
+       mad_hdr->base_version = IB_MGMT_BASE_VERSION;
+       mad_hdr->mgmt_class = XSIGO_MGMT_CLASS;
+       mad_hdr->class_version = XSIGO_MGMT_CLASS_VERSION;
+       mad_hdr->method = IB_MGMT_METHOD_GET;
+       mad_hdr->attr_id = cpu_to_be16(IB_MAD_ATTR_XCM_REQUEST);
+       mad_hdr->tid = port->mad_agent->hi_tid;
+       mad_hdr->tid <<= 32;
+       mad_hdr->tid |= port->port_num;
+       mad_hdr->tid = cpu_to_be64(mad_hdr->tid);
+}
+
+/*
+ * 31 bit should be set
+ * |30|......|17      |16    |15|14
+ * ......    | VHBA   | VNIC |
+ */
+static void xds_send_cap_info(struct xds_request *request)
+{
+       uint32_t cap_info, i;
+
+       cap_info = (1 << 31) & 0xffffffff;
+
+       for (i = 0; i < RESOURCE_FLAG_INDEX_MAX; i++) {
+               if (xcpm_resource_flags & (1 << i))
+                       cap_info = (cap_info & 0xffff0000) | (1 << (16 + i));
+       }
+       request->reserved = htonl(cap_info);
+}
+
+/*
+ * Create a XDS query packet
+ */
+static void create_xds_mad_req(struct xscore_port *port,
+                              struct xds_request *request)
+{
+       u8 h[16 + 1];
+       char tmp_os_version[64];
+       unsigned long system_id_ul;
+       int ret;
+
+       request->server_record.port_id = cpu_to_be64(port->guid);
+       strncpy(request->hostname, hostname_str, XSIGO_MAX_HOSTNAME);
+       snprintf(tmp_os_version, sizeof(tmp_os_version) - 1, "%s:xg-%s",
+                init_utsname()->release, XSCORE_VERSION);
+       if (strlen(tmp_os_version) >= sizeof(request->os_version)) {
+               snprintf(request->os_version, sizeof(request->os_version) - 1,
+                        "%s", init_utsname()->release);
+               snprintf(request->build_version,
+                        sizeof(request->build_version) - 1, "xg-%s",
+                        XSCORE_VERSION);
+       } else {
+               snprintf(request->os_version, sizeof(request->os_version) - 1,
+                        "%s:xg-%s", init_utsname()->release, XSCORE_VERSION);
+       }
+       strcpy(request->os_arch, init_utsname()->machine);
+       request->os_type = htonl(RESOURCE_OS_TYPE_LINUX);
+       request->os_version[sizeof(request->os_version) - 1] = 0;
+       request->os_arch[sizeof(request->os_arch) - 1] = 0;
+
+       request->fw_version = cpu_to_be64(port->xs_dev->fw_ver);
+       request->hw_version = htonl(port->xs_dev->hw_ver);
+       request->driver_version = htonl(XSIGO_LINUX_DRIVER_VERSION);
+       if (system_id_str[0]) {
+               ret = kstrtoul(system_id_str + 16, 16, &system_id_ul);
+               request->system_id_l = cpu_to_be64(system_id_ul);
+               memcpy(h, system_id_str, 16);
+               h[16] = 0;
+               ret = kstrtoul(h, 16, &system_id_ul);
+               request->system_id_h = cpu_to_be64(system_id_ul);
+       }
+       xds_send_cap_info(request);
+}
+
+/*
+ * Send completion handler for XDS query
+ */
+static void xds_send_handler(struct ib_mad_agent *agent,
+                            struct ib_mad_send_wc *mad_send_wc)
+{
+       struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
+       struct xscore_port *port = agent->context;
+
+       switch (mad_send_wc->status) {
+       case IB_WC_SUCCESS:
+               break;
+       default:
+               break;
+       }
+
+       XDS_INFO("%s, Unmapping send buffer: status %d, Port GUID: 0x%llx\n",
+                __func__, mad_send_wc->status, port->guid);
+
+       ib_destroy_ah(msg->ah);
+       ib_free_send_mad(msg);
+}
+
+static int xds_check_xcm_record(struct xscore_port *port,
+                               struct ib_xds_mad *xds_mad)
+{
+       struct xcm_list list;
+       int i;
+
+       XDS_FUNCTION("%s: port 0x%llx\n", __func__, port->guid);
+
+       /*
+        * Skip server_info structure size in response
+        */
+       memcpy(&list, xds_mad->data + sizeof(struct server_info), sizeof(list));
+
+       XDS_INFO("%s: port 0x%llx, XCM list count %d\n", __func__,
+                port->guid, list.count);
+
+       if (list.count > MAX_XCFM_COUNT) {
+               /*
+                * Print error
+                */
+               XDS_ERROR("%s GUID: 0x%llx, list count range error %d\n",
+                         __func__, port->guid, list.count);
+               return -EINVAL;
+       }
+       if (list.count && list.xcm_version != XCM_REC_VERSION) {
+               XDS_ERROR("%s GUID: 0x%llx, Bad XCM version %d\n",
+                         __func__, port->guid, list.xcm_version);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < list.count; i++) {
+               u64 dguid;
+               u16 dlid;
+               /*
+                * Go through all the XSMP sessions and verify for any duplicate
+                */
+               struct xcfm_record *xcmp = &list.xcms[i];
+
+               dguid = be64_to_cpu(xcmp->port_id);
+               dlid = be16_to_cpu(xcmp->xcm_lid);
+               XDS_INFO("Port GUID: 0x%llx, XCM lid: 0x%x, XCM guid: 0x%llx\n",
+                        port->guid, dlid, dguid);
+               xsmp_allocate_xsmp_session(port, dguid, dlid);
+       }
+       if (list.count) {
+               port->counters[PORT_XDS_LIST_COUNT_COUNTER]++;
+               set_bit(XSCORE_SP_PRESENT, &port->flags);
+               clear_bit(XSCORE_SP_NOT_PRESENT, &port->flags);
+       } else {
+               port->counters[PORT_XDS_LIST_COUNT_ZERO_COUNTER]++;
+               set_bit(XSCORE_SP_NOT_PRESENT, &port->flags);
+               clear_bit(XSCORE_SP_PRESENT, &port->flags);
+       }
+
+       return 0;
+}
+
+/*
+ * Receive completion handler for XDS query
+ */
+static void xds_recv_handler(struct ib_mad_agent *mad_agent,
+                            struct ib_mad_recv_wc *mad_recv_wc)
+{
+       struct xscore_port *port = mad_agent->context;
+
+       XDS_FUNCTION("%s: port 0x%llx\n", __func__, port->guid);
+
+       port->counters[PORT_XDS_XDS_QUERY_COUNTER]++;
+       port->mad_recv_wc = mad_recv_wc;
+       complete(&port->xds_query_done);
+}
+
+/*
+ * This routine queries XDS for XCM record. This is synchronous and needs to
+ * called in thread/workq context
+ */
+int xscore_query_xds_xcm_rec(struct xscore_port *port)
+{
+       struct xscore_dev *xs_dev = port->xs_dev;
+       struct ib_ah_attr ah_attr;
+       struct ib_mad_recv_wc *mad_recv_wc;
+       struct ib_xds_mad *xds_mad;
+       struct xds_request *request;
+       struct ib_port_attr port_attr;
+       int ret;
+
+       XDS_FUNCTION("%s: port 0x%llx\n", __func__, port->guid);
+
+       port->send_buf = ib_create_send_mad(port->mad_agent, 1, 0, 0,
+                                           IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
+                                           GFP_KERNEL);
+       port->rec_poller_state = XDS_RECP_CREATEMAD_DONE;
+       if (IS_ERR(port->send_buf)) {
+               ret = PTR_ERR(port->send_buf);
+               IB_ERROR("ib_create_send_mad failed, error %d, GUID: 0x%llx\n",
+                        ret, port->guid);
+               return ret;
+       }
+       /*
+        * Create XDS MAD query packet
+        */
+       xds_mad = port->send_buf->mad;
+       memset(xds_mad, 0, sizeof(*xds_mad));
+       request = (struct xds_request *)xds_mad->data;
+       create_ib_mad_header(port, xds_mad);
+       create_xds_mad_req(port, request);
+
+       memset(&ah_attr, 0, sizeof(ah_attr));
+       ah_attr.dlid = port->xds_lid;
+       (void)ib_query_port(xs_dev->device, port->port_num, &port_attr);
+       ah_attr.sl = port_attr.sm_sl;
+       ah_attr.port_num = port->port_num;
+
+       port->send_buf->ah = ib_create_ah(port->mad_agent->qp->pd, &ah_attr);
+       if (IS_ERR(port->send_buf->ah)) {
+               ib_free_send_mad(port->send_buf);
+               ret = PTR_ERR(port->send_buf->ah);
+               IB_ERROR("ib_create_ah failed, error %d, GUID: 0x%llx\n",
+                        ret, port->guid);
+               return ret;
+       }
+       port->rec_poller_state = XDS_RECP_CREATEAH_DONE;
+
+       port->send_buf->retries = 2;
+       port->send_buf->timeout_ms = XSCORE_SA_QUERY_TIMEOUT;
+
+       init_completion(&port->xds_query_done);
+
+       ret = ib_post_send_mad(port->send_buf, NULL);
+       if (ret) {
+               IB_ERROR("ib_post_send_mad failed, error %d, GUID: 0x%llx\n",
+                        ret, port->guid);
+               ib_destroy_ah(port->send_buf->ah);
+               ib_free_send_mad(port->send_buf);
+               port->counters[PORT_XDS_XDS_QUERY_ERROR_COUNTER]++;
+               port->send_buf = 0;
+               return ret;
+       }
+       port->rec_poller_state = XDS_RECP_SENDMAD_DONE;
+       if (!wait_for_completion_timeout(&port->xds_query_done,
+                                        msecs_to_jiffies
+                                        (XSCORE_SA_QUERY_TIMEOUT * 10))) {
+               XDS_ERROR("%s: completion timeout, port: %d, GUID: 0x%llx\n:",
+                         __func__, port->port_num, port->guid);
+               port->counters[PORT_XDS_XDS_QUERY_TOUT_COUNTER]++;
+               return -ETIMEDOUT;
+       }
+       mad_recv_wc = port->mad_recv_wc;
+       if (!mad_recv_wc || mad_recv_wc->wc->status != IB_WC_SUCCESS) {
+               if (mad_recv_wc)
+                       ret = mad_recv_wc->wc->status;
+               else
+                       ret = -EINVAL;
+       } else
+               xds_check_xcm_record(port,
+                                    (struct ib_xds_mad *)mad_recv_wc->
+                                    recv_buf.mad);
+       ib_free_recv_mad(port->mad_recv_wc);
+       port->rec_poller_state = XDS_RECP_FREEMAD_DONE;
+       port->mad_recv_wc = 0;
+       return ret;
+}
+
+static int xs_send_xds_disc_msg(struct xscore_port *port)
+{
+       int ret;
+       struct xdds_disc_req xd_msg;
+
+       port->counters[PORT_XDS_XDS_QUERY_COUNTER]++;
+
+       memset(&xd_msg, 0, sizeof(struct xdds_disc_req));
+       xd_msg.xhdr.type = htons(XDP_MSG_TYPE_DISC_SOL);
+       xd_msg.xhdr.flags = htons(XDP_FLAGS_REQ);
+       xd_msg.xhdr.len = htons(sizeof(struct xdds_disc_req));
+
+       create_xds_mad_req(port, &xd_msg.req);
+       ret = xs_ud_send_msg(port, 0, &xd_msg, sizeof(xd_msg), XS_UD_COPY_MSG);
+       if (ret) {
+               XDDS_ERROR("xs_ud_send_msg: port GUID %llx failed, error %d\n",
+                          port->guid, ret);
+               port->counters[PORT_XDS_XDS_QUERY_ERROR_COUNTER]++;
+       }
+       return ret;
+}
+
+static void xcm_rec_poller(struct work_struct *work)
+{
+       struct xscore_port *port = container_of(work, struct xscore_port,
+                                               poll_work.work);
+       unsigned long flags;
+       struct ib_port_attr port_attr;
+       int ret = 0;
+
+       xscore_set_wq_state(XSCORE_DWQ_POLL_WORK);
+       port->rec_poller_state = XDS_RECP_START;
+       xsmp_cleanup_stale_xsmp_sessions(port, 0);
+
+       (void)ib_query_port(port->xs_dev->device, port->port_num, &port_attr);
+       port->rec_poller_state = XDS_RECP_QUERY_IB_DONE;
+
+       if (port_attr.state != IB_PORT_ACTIVE) {
+               XDS_INFO("%s: Port %d, GUID: 0x%llx, Not Active\n",
+                        __func__, port->port_num, port->guid);
+               port->counters[PORT_XDS_PORT_NOT_ACTIVE_COUNTER]++;
+       } else {
+               if (port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+                       ret = xscore_query_svc_record(port);
+                       if (!ret)
+                               ret = xscore_query_xds_xcm_rec(port);
+               } else
+                       (void)xs_send_xds_disc_msg(port);
+       }
+       if (ret)
+               port->poll_interval = msecs_to_jiffies(1000 * 10);
+       else
+               port->poll_interval = msecs_to_jiffies(1000 * 20);
+       spin_lock_irqsave(&port->lock, flags);
+       if (!test_bit(XSCORE_PORT_SHUTDOWN, &port->flags))
+               queue_delayed_work(port->port_wq,
+                                  &port->poll_work, port->poll_interval);
+       spin_unlock_irqrestore(&port->lock, flags);
+
+       port->rec_poller_state = XDS_RECP_DONE;
+       port->rec_poller_time = jiffies;
+       xscore_clear_wq_state(XSCORE_DWQ_POLL_WORK);
+
+}
+
+static void xscore_destroy_port(struct xscore_port *port)
+{
+       IB_FUNCTION("%s: port %d\n", __func__, port->port_num);
+       if (port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+               ib_unregister_mad_agent(port->mad_agent);
+               port->mad_agent = 0;
+       } else
+               xs_ud_destroy(port);
+}
+
+/*
+ * Convert GUID to MAC address by stripping out bytes 3 and 4  == FF0E
+ * Reset bit 7 of byte 0 as per specification
+ */
+static void convert_guid_to_mac(u64 guid, u64 *mac)
+{
+       u64 t1;
+
+       t1 = guid & 0x0000000000FFFFFFLL;
+       guid >>= 16;
+       t1 |= (guid & 0x0000FFFFFF000000LL);
+       *mac = t1;
+       *mac ^= (1ULL << 41);
+}
+
+/*
+ * Initialize Query based on port information
+ */
+static int xscore_init_port(struct xscore_port *port)
+{
+       struct xscore_dev *xs_dev = port->xs_dev;
+       struct ib_port_attr port_attr;
+       int ret;
+
+       IB_FUNCTION("%s\n", __func__);
+
+       ret = ib_query_gid(xs_dev->device, port->port_num, 0, &port->sgid);
+       if (ret) {
+               IB_ERROR("xscore_init_port: ib_query_gid GUID 0x%llx %d\n",
+                        port->guid, ret);
+               return ret;
+       }
+       /*
+        * Get port attributes and check the type of the port
+        */
+       ret = ib_query_port(xs_dev->device, port->port_num, &port_attr);
+       if (ret) {
+               IB_ERROR("xscore_init_port: ib_query_port GUID: 0x%llx, %d\n",
+                        port->guid, ret);
+               return ret;
+       }
+       port->link_layer = rdma_port_link_layer(xs_dev->device, port->port_num);
+       port->guid = be64_to_cpu(port->sgid.global.interface_id);
+       port->lid = port_attr.lid;
+       port->sm_lid = port_attr.sm_lid;
+
+       XDS_PRINT("Port Number: %d, ", port->port_num);
+       XDS_PRINT("GUID: 0x%llx, ", port->guid);
+       XDS_PRINT("LID: 0x%x, ", port->lid);
+       XDS_PRINT("SM LID: 0x%x, ", port->sm_lid);
+       XDS_PRINT("Mode: ");
+       XDS_PRINT("%s\n",
+                 port->link_layer == IB_LINK_LAYER_INFINIBAND ? "IB" : "ETH");
+
+       if (port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+               ret = xscore_init_mad_agent(port);
+       } else {
+               u64 mac;
+
+               /*
+                * Convert to MAC only if valid GUID.
+                * In case of link down, GUID is zero
+                */
+               if (port->guid) {
+                       convert_guid_to_mac(port->guid, &mac);
+                       port->guid = mac;
+               }
+               ret = xs_ud_create(port, xs_ud_callback, port);
+       }
+       return ret;
+}
+
+static void xscore_remove_port(struct xscore_port *port)
+{
+       /*
+        * Set a state bit to tell others we are going down
+        */
+       IB_FUNCTION("%s: port %d\n", __func__, port->port_num);
+
+       flush_workqueue(port->port_wq);
+       destroy_workqueue(port->port_wq);
+       port->port_wq = 0;
+       xscore_destroy_port(port);
+       list_del(&port->port_list);
+       mutex_lock(&xscore_port_mutex);
+       list_del(&port->gport_list);
+       mutex_unlock(&xscore_port_mutex);
+       xcpm_port_remove_proc_entry(port);
+       kfree(port);
+}
+
+/*
+ * Initialize a port context
+ */
+static struct xscore_port *xscore_add_port(struct xscore_dev *device,
+                                          u8 port_num)
+{
+       struct xscore_port *port;
+       char name[32];
+       int ret;
+
+       IB_FUNCTION("%s: port %d\n", __func__, port_num);
+
+       port = kzalloc(sizeof(*port), GFP_KERNEL);
+       if (!port)
+               return NULL;
+       port->xs_dev = device;
+       port->port_num = port_num;
+
+       INIT_LIST_HEAD(&port->xsmp_list);
+       spin_lock_init(&port->lock);
+
+       INIT_WORK(&port->ework, xscore_port_event_handler);
+       INIT_DELAYED_WORK(&port->poll_work, xcm_rec_poller);
+       sprintf(name, "xs_wq:%d", port_num);
+       port->port_wq = create_singlethread_workqueue(name);
+       if (!port->port_wq)
+               goto err_ret;
+
+       ret = xscore_init_port(port);
+       if (ret) {
+               XDS_ERROR("xscore_init_port failed %d\n", ret);
+               goto err_ret1;
+       }
+
+       if (port->guid)
+               xcpm_port_add_proc_entry(port);
+       /*
+        * Now start XCM record polling
+        */
+       queue_delayed_work(port->port_wq,
+                          &port->poll_work, port->poll_interval);
+
+       return port;
+err_ret1:
+       destroy_workqueue(port->port_wq);
+err_ret:
+       kfree(port);
+       return NULL;
+}
+
+static void xscore_port_event_handler(struct work_struct *work)
+{
+       struct xscore_port *port =
+           container_of(work, struct xscore_port, ework);
+       struct ib_port_attr port_attr;
+       int port_up;
+
+       xscore_set_wq_state(XSCORE_WQ_PORT_EVENTH);
+       if (port->link_layer == IB_LINK_LAYER_ETHERNET &&
+           test_bit(XSCORE_PORT_LID_CHANGE, &port->flags)) {
+               u64 mac;
+
+               clear_bit(XSCORE_PORT_LID_CHANGE, &port->flags);
+               ib_query_gid(port->xs_dev->device, port->port_num, 0,
+                            &port->sgid);
+               port->guid = be64_to_cpu(port->sgid.global.interface_id);
+               convert_guid_to_mac(port->guid, &mac);
+               port->guid = mac;
+               xcpm_port_add_proc_entry(port);
+       }
+
+       (void)ib_query_port(port->xs_dev->device, port->port_num, &port_attr);
+
+       /*
+        * In the case of SM lid change update with new one
+        */
+       if (xscore_notify_ulps
+           && (test_and_clear_bit(XSCORE_PORT_SMLID_CHANGE, &port->flags))) {
+               pr_info("%s port%d SM Update ", __func__, port->port_num);
+               pr_info(" [New %x old %x]\n", port_attr.sm_lid, port->sm_lid);
+               port->sm_lid = port_attr.sm_lid;
+       }
+
+       /*
+        * We have seen the ACTIVE event come up, but port is still not ACTIVE
+        * Make it active if we get ACTIVE event and port is still not active
+        */
+       if (port->pevent == IB_EVENT_PORT_ACTIVE
+           || port_attr.state == IB_PORT_ACTIVE) {
+               pr_info("xscore: Port: %llx UP\n", port->guid);
+               port_up = 1;
+               port->lid = port_attr.lid;
+               port->sm_lid = port_attr.sm_lid;
+       } else {
+               port_up = 0;
+               pr_info("xscore: Port: %llx DOWN\n", port->guid);
+       }
+       xsmp_ulp_notify(port, port_up);
+       xscore_clear_wq_state(XSCORE_WQ_PORT_EVENTH);
+}
+
+/*
+ * IB stack event handler callback
+ */
+static void xscore_event_handler(struct ib_event_handler *handler,
+                                struct ib_event *event)
+{
+       struct xscore_dev *xs_dev =
+           ib_get_client_data(event->device, &xscore_client);
+       struct xscore_port *port;
+       int port_num = event->element.port_num;
+
+       if (!xs_dev || xs_dev->device != event->device)
+               return;
+
+       list_for_each_entry(port, &xs_dev->port_list, port_list) {
+               if (port->port_num == port_num)
+                       goto found;
+       }
+       return;
+
+found:
+       port->pevent = event->event;
+
+       switch (event->event) {
+       case IB_EVENT_PORT_ERR:
+       case IB_EVENT_PORT_ACTIVE:
+               queue_work(port->port_wq, &port->ework);
+               break;
+       case IB_EVENT_LID_CHANGE:
+               /*
+                * Used by IBOE
+                */
+               set_bit(XSCORE_PORT_LID_CHANGE, &port->flags);
+               queue_work(port->port_wq, &port->ework);
+               break;
+       case IB_EVENT_PKEY_CHANGE:
+               break;
+       case IB_EVENT_SM_CHANGE:
+               if (xscore_notify_ulps) {
+                       set_bit(XSCORE_PORT_SMLID_CHANGE, &port->flags);
+                       queue_work(port->port_wq, &port->ework);
+               }
+               break;
+       default:
+               break;
+       }
+}
+
+static const u64 min_fw_version = (2ULL << 32) | (7ULL << 16) | (0ULL << 0);
+
+static int xscore_is_mlx4_fw_down_rev(u64 fw_ver)
+{
+
+       return (fw_ver < min_fw_version);
+}
+
+/*
+ * This callback gets called ror every HCA in the system
+ * This gets executed for the most part in the register call context
+ */
+static void xscore_add_one(struct ib_device *device)
+{
+       struct xscore_dev *xs_dev;
+       struct ib_device_attr dev_attr;
+       int p;
+       struct xscore_port *port;
+
+       IB_FUNCTION("%s: device: %s\n", __func__, device->name);
+
+       if (ib_query_device(device, &dev_attr)) {
+               IB_ERROR("Query device failed for %s\n", device->name);
+               return;
+       }
+
+       /* See if this is some form of a Mellanox ConnectX card */
+       if (strncmp(device->name, "mlx4", sizeof("mlx4") - 1) == 0) {
+               if (xscore_is_mlx4_fw_down_rev(dev_attr.fw_ver)) {
+                       pr_info("Firmware on device \"%s\" (%d,%d,%d) is below",
+                               device->name,
+                               (int)((dev_attr.fw_ver >> 32) & 0xffff),
+                               (int)((dev_attr.fw_ver >> 16) & 0xffff),
+                               (int)(dev_attr.fw_ver & 0xffff));
+                       pr_info(" min needed to support ethernet transport");
+                       pr_info("Minimum firmware version is %d.%d.%d\n",
+                               (int)((min_fw_version >> 32) & 0xffff),
+                               (int)((min_fw_version >> 16) & 0xffff),
+                               (int)(min_fw_version & 0xffff));
+               }
+       }
+
+       xs_dev = kzalloc(sizeof(*xs_dev), GFP_KERNEL);
+       if (!xs_dev)
+               return;
+
+       INIT_LIST_HEAD(&xs_dev->port_list);
+       if (strstr(device->name, "xgc"))
+               xs_dev->is_shca = 1;
+       xs_dev->device = device;
+       xs_dev->dev_attr = dev_attr;
+       xs_dev->fw_ver = dev_attr.fw_ver;
+       xs_dev->hw_ver = dev_attr.hw_ver;
+       xs_dev->pd = ib_alloc_pd(device);
+       if (IS_ERR(xs_dev->pd))
+               goto free_dev;
+
+       xs_dev->mr = ib_get_dma_mr(xs_dev->pd,
+                                  IB_ACCESS_LOCAL_WRITE |
+                                  IB_ACCESS_REMOTE_READ |
+                                  IB_ACCESS_REMOTE_WRITE);
+       if (IS_ERR(xs_dev->mr))
+               goto err_pd;
+
+       for (p = 1; p <= device->phys_port_cnt; ++p) {
+               port = xscore_add_port(xs_dev, p);
+               if (port) {
+                       list_add_tail(&port->port_list, &xs_dev->port_list);
+                       mutex_lock(&xscore_port_mutex);
+                       list_add_tail(&port->gport_list, &xscore_port_list);
+                       mutex_unlock(&xscore_port_mutex);
+               }
+       }
+
+       ib_set_client_data(device, &xscore_client, xs_dev);
+
+       INIT_IB_EVENT_HANDLER(&xs_dev->event_handler, xs_dev->device,
+                             xscore_event_handler);
+       (void)ib_register_event_handler(&xs_dev->event_handler);
+
+       return;
+
+err_pd:
+       ib_dealloc_pd(xs_dev->pd);
+free_dev:
+       kfree(xs_dev);
+}
+
+/*
+ * Remove a HCA from the system, happens during driver unload when we unregister
+ * from IB stack
+ */
+static void xscore_remove_one(struct ib_device *device)
+{
+       struct xscore_dev *xs_dev;
+       struct xscore_port *port;
+       struct xscore_port *tmp_port;
+       unsigned long flags;
+
+       IB_FUNCTION("%s: device: %s\n", __func__, device->name);
+
+       xs_dev = ib_get_client_data(device, &xscore_client);
+       ib_unregister_event_handler(&xs_dev->event_handler);
+       /*
+        * Now go through the port list and shut down everything you can
+        */
+       list_for_each_entry_safe(port, tmp_port, &xs_dev->port_list,
+                       port_list) {
+               spin_lock_irqsave(&port->lock, flags);
+               set_bit(XSCORE_PORT_SHUTDOWN, &port->flags);
+               spin_unlock_irqrestore(&port->lock, flags);
+               cancel_delayed_work(&port->poll_work);
+               flush_workqueue(port->port_wq);
+               cancel_delayed_work(&port->poll_work);
+               xsmp_cleanup_stale_xsmp_sessions(port, 1);
+               xscore_remove_port(port);
+       }
+       ib_dereg_mr(xs_dev->mr);
+       ib_dealloc_pd(xs_dev->pd);
+       kfree(xs_dev);
+}
+
+/*
+ * Driver load entry point
+ */
+static int __init xscore_init(void)
+{
+       int ret;
+
+       if (!hostname)
+               strncpy(hostname_str, init_utsname()->nodename,
+                       XSIGO_MAX_HOSTNAME);
+       else
+               strncpy(hostname_str, hostname, XSIGO_MAX_HOSTNAME);
+       hostname_str[XSIGO_MAX_HOSTNAME] = 0;
+
+       system_id_str[0] = 0;
+       if (system_id)
+               strncpy(system_id_str, system_id, sizeof(system_id_str) - 1);
+       system_id_str[sizeof(system_id_str) - 1] = 0;
+
+       xg_vmk_kompat_init();
+
+       INIT_LIST_HEAD(&xscore_port_list);
+       mutex_init(&xscore_port_mutex);
+
+       ret = xscore_create_procfs_entries();
+       if (ret)
+               return ret;
+
+       xsmp_module_init();
+       /*
+        * Now register with SA
+        */
+       ib_sa_register_client(&xscore_sa_client);
+
+       /*
+        * Now register with IB framework
+        */
+       ret = ib_register_client(&xscore_client);
+       if (ret) {
+               IB_ERROR("couldn't register IB client\n");
+               goto err1;
+       }
+       ret = xscore_uadm_init();
+       if (ret)
+               goto err2;
+       /* Wait for Sessions to come up */
+       xscore_wait_for_sessions(1);
+       return ret;
+err2:
+       ib_unregister_client(&xscore_client);
+err1:
+       ib_sa_unregister_client(&xscore_sa_client);
+       xsmp_module_destroy();
+       xscore_remove_procfs_entries();
+       return ret;
+}
+
+/*
+ * Driver unload entry point
+ */
+static void __exit xscore_exit(void)
+{
+       xscore_uadm_destroy();
+       ib_unregister_client(&xscore_client);
+       ib_sa_unregister_client(&xscore_sa_client);
+       xsmp_module_destroy();
+       xscore_remove_procfs_entries();
+       xg_vmk_kompat_cleanup();
+}
+
+module_init(xscore_init);
+module_exit(xscore_exit);
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xscore_priv.h b/drivers/infiniband/ulp/xsigo/xscore/xscore_priv.h
new file mode 100644 (file)
index 0000000..e3a6a90
--- /dev/null
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _XSCORE_PRIV_H_
+#define _XSCORE_PRIV_H_
+
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/err.h>
+#include <linux/dma-mapping.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cm.h>
+
+#include <linux/version.h>
+
+#include <rdma/ib_addr.h>
+#include <rdma/ib_fmr_pool.h>
+#include <asm/byteorder.h>
+
+#include "xs_compat.h"
+#include "xscore_xds.h"
+#include "xsmp_common.h"
+#include "xsmp_session.h"
+
+#define MODULE_NAME "XSCORE"
+
+#define XSCORE_WQ_XDDS_HANDLER    0x1
+#define XSCORE_WQ_PORT_EVENTH     0x2
+#define XSCORE_WQ_XSMP_PROC_MSG   0x3
+#define XSCORE_DWQ_POLL_WORK     0x4
+#define XSCORE_DWQ_SM_WORK        0x5
+
+extern int xscore_debug;
+extern unsigned long xscore_wait_time;
+extern int xscore_force_sm_change;
+extern struct mutex xscore_port_mutex;
+extern unsigned long xscore_wq_state;
+extern unsigned long xscore_wq_jiffies;
+extern unsigned long xscore_last_wq;
+
+enum {
+       DEBUG_IB_INFO = 0x00000001,
+       DEBUG_IB_FUNCTION = 0x00000002,
+       DEBUG_XDS_INFO = 0x00000004,
+       DEBUG_XDS_FUNCTION = 0x00000008,
+       DEBUG_XSMP_INFO = 0x00000010,
+       DEBUG_XSMP_FUNCTION = 0x00000020,
+       DEBUG_UADM_INFO = 0x00000040,
+       DEBUG_UADM_FUNCTION = 0x00000080,
+       DEBUG_XDDS_INFO = 0x00000100,
+       DEBUG_XDDS_FUNCTION = 0x00000200,
+};
+
+#define PRINT(level, x, fmt, arg...)                                    \
+       printk(level "%s: " fmt, MODULE_NAME, ##arg)
+
+#define PRINT_CONDITIONAL(level, x, condition, fmt, arg...)             \
+       do {                                                            \
+               if (condition)                                         \
+                       printk(level "%s: %s: " fmt,                    \
+                       MODULE_NAME, x, ##arg);                  \
+       } while (0)
+
+#define IB_PRINT(fmt, arg...)                   \
+       PRINT(KERN_INFO, "IB", fmt, ##arg)
+#define IB_ERROR(fmt, arg...)                   \
+       PRINT(KERN_ERR, "IB", fmt, ##arg)
+
+#define IB_FUNCTION(fmt, arg...)                                \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "IB",                                 \
+                       (xscore_debug & DEBUG_IB_FUNCTION),     \
+                       fmt, ##arg)
+
+#define IB_INFO(fmt, arg...)                                    \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "IB",                                 \
+                       (xscore_debug & DEBUG_IB_INFO),         \
+                       fmt, ##arg)
+
+#define XDS_PRINT(fmt, arg...)                   \
+       PRINT(KERN_INFO, "XDS", fmt, ##arg)
+#define XDS_ERROR(fmt, arg...)                   \
+       PRINT(KERN_ERR, "XDS", fmt, ##arg)
+
+#define XDS_FUNCTION(fmt, arg...)                                \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "XDS",                                 \
+                       (xscore_debug & DEBUG_XDS_FUNCTION),     \
+                       fmt, ##arg)
+
+#define XDS_INFO(fmt, arg...)                                    \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "XDS",                                 \
+                       (xscore_debug & DEBUG_XDS_INFO),         \
+                       fmt, ##arg)
+
+#define XSMP_PRINT(fmt, arg...)                   \
+       PRINT(KERN_INFO, "XSMP", fmt, ##arg)
+#define XSMP_ERROR(fmt, arg...)                   \
+       PRINT(KERN_ERR, "XSMP", fmt, ##arg)
+
+#define XSMP_FUNCTION(fmt, arg...)                                \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+               "XSMP",                                 \
+               (xscore_debug & DEBUG_XSMP_FUNCTION),     \
+               fmt, ##arg)
+
+#define XSMP_INFO(fmt, arg...)                                    \
+               PRINT_CONDITIONAL(KERN_INFO,                            \
+               "XSMP",                                 \
+               (xscore_debug & DEBUG_XSMP_INFO),         \
+               fmt, ##arg)
+
+#define UADM_PRINT(fmt, arg...)                   \
+       PRINT(KERN_INFO, "UADM", fmt, ##arg)
+#define UADM_ERROR(fmt, arg...)                   \
+       PRINT(KERN_ERR, "UADM", fmt, ##arg)
+
+#define UADM_FUNCTION(fmt, arg...)                                \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "UADM",                                 \
+                       (xscore_debug & DEBUG_UADM_FUNCTION),     \
+                       fmt, ##arg)
+
+#define UADM_INFO(fmt, arg...)                                    \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "UADM",                                 \
+                       (xscore_debug & DEBUG_UADM_INFO),         \
+                       fmt, ##arg)
+
+#define XDDS_PRINT(fmt, arg...)                   \
+       PRINT(KERN_INFO, "XDDS", fmt, ##arg)
+#define XDDS_ERROR(fmt, arg...)                   \
+       PRINT(KERN_ERR, "XDDS", fmt, ##arg)
+
+#define XDDS_FUNCTION(fmt, arg...)                                \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "XDDS",                                 \
+                       (xscore_debug & DEBUG_XDDS_FUNCTION),     \
+                       fmt, ##arg)
+
+#define XDDS_INFO(fmt, arg...)                                    \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "XDDS",                                 \
+                       (xscore_debug & DEBUG_XDDS_INFO),         \
+                       fmt, ##arg)
+
+/*
+ * This structure represents context for the HCA
+ */
+struct xscore_dev {
+       struct list_head port_list;
+       struct ib_device *device;
+       struct ib_pd *pd;
+       struct ib_mr *mr;
+       struct ib_event_handler event_handler;
+       /* We can remove fw_ver and hw_ver */
+       struct ib_device_attr dev_attr;
+       u64 fw_ver;
+       u32 hw_ver;
+       u32 vendor_part_id;
+       u8 is_shca;
+};
+
+enum {
+       PORT_XDS_PORT_NOT_ACTIVE_COUNTER,
+       PORT_XDS_SA_QUERY_ERROR_COUNTER,
+       PORT_XDS_SA_QUERY_TOUT_COUNTER,
+       PORT_XDS_SA_QUERY_COUNTER,
+       PORT_XDS_XDS_QUERY_ERROR_COUNTER,
+       PORT_XDS_XDS_QUERY_TOUT_COUNTER,
+       PORT_XDS_XDS_QUERY_COUNTER,
+       PORT_XDS_LIST_COUNT_ZERO_COUNTER,
+       PORT_XDS_LIST_COUNT_COUNTER,
+       PORT_MAX_COUNTERS
+};
+enum {
+       XDS_RECP_START = 1,
+       XDS_RECP_QUERY_IB_DONE,
+       XDS_RECP_SAUPDATE_DONE,
+       XDS_RECP_SAREC_DONE,
+       XDS_RECP_CREATEMAD_DONE,
+       XDS_RECP_CREATEAH_DONE,
+       XDS_RECP_SENDMAD_DONE,
+       XDS_RECP_FREEMAD_DONE,
+       XDS_RECP_DONE
+};
+
+/*
+ * This represents context fo each port
+ */
+/* TBD Add state in this a- PORT_ACTIVE ,b- XDS RECORD/ NO XDS RECORD */
+struct xscore_port {
+       spinlock_t lock;
+       struct xscore_dev *xs_dev;      /* Back pointer to HCA context */
+       struct list_head port_list;
+       struct list_head gport_list;
+       unsigned long flags;
+#define        XSCORE_PORT_SHUTDOWN            1
+#define        XSCORE_PORT_LID_CHANGE          2
+#define        XSCORE_PORT_PROCFS_CREATED      3
+#define        XSCORE_SP_PRESENT               4
+#define        XSCORE_SP_NOT_PRESENT           5
+#define XSCORE_FORCE_SM_CHANGE         6
+#define XSCORE_PORT_SMLID_CHANGE       7
+       u8 port_num;
+       struct workqueue_struct *port_wq;
+       struct delayed_work poll_work;
+       enum ib_event_type pevent;
+       struct work_struct ework;
+       int poll_interval;
+       int rec_poller_state;
+       unsigned long rec_poller_time;
+       struct ib_mad_agent *mad_agent;
+       struct ib_mad_send_buf *send_buf;
+       struct completion sa_query_done;
+       int sa_query_status;
+       struct completion xds_query_done;
+       struct xcm_list xcm_list;
+       struct ib_mad_recv_wc *mad_recv_wc;
+       u64 guid;
+       union ib_gid sgid;
+       u16 lid;
+       u16 sm_lid;
+       u16 xds_lid;
+       u64 xds_guid;
+       enum rdma_link_layer link_layer;
+       struct ib_ud_ctx *ib_ud_ctx;
+       struct list_head xsmp_list;
+       u32 counters[PORT_MAX_COUNTERS];
+};
+
+#define XS_UD_COPY_MSG          0x1
+
+static inline void xscore_set_wq_state(unsigned long state)
+{
+}
+
+static inline void xscore_clear_wq_state(unsigned long state)
+{
+}
+
+extern int xs_vpci_bus_init(void);
+extern void xs_vpci_bus_remove(void);
+
+extern int xs_ud_create(struct xscore_port *pinfop,
+                       void (*callback)(void *, void *, int), void *arg);
+extern void xs_ud_destroy(struct xscore_port *pinfop);
+
+extern int xs_ud_send_msg(struct xscore_port *pinfop, uint8_t *macp,
+                         void *msgp, int len, int flags);
+extern void xs_ud_free(void *msg);
+
+void xsmp_module_init(void);
+void xsmp_module_destroy(void);
+void xsmp_allocate_xsmp_session(struct xscore_port *port, u64 guid, u16 lid);
+void xsmp_cleanup_stale_xsmp_sessions(struct xscore_port *port, int force);
+/* Externs*/
+extern struct ib_sa_client xscore_sa_client;
+
+#endif /* _XSCORE_PRIV_H_ */
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xscore_stats.c b/drivers/infiniband/ulp/xsigo/xscore/xscore_stats.c
new file mode 100644 (file)
index 0000000..bb07056
--- /dev/null
@@ -0,0 +1,708 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/utsname.h>
+
+#include "xscore_priv.h"
+#include "xs_compat.h"
+#include "xscore.h"
+#include "xsmp.h"
+
+#define        PFX     "STATS"
+
+unsigned long xscore_wq_state;
+unsigned long xscore_wq_jiffies;
+unsigned long xscore_last_wq;
+
+struct proc_dir_entry *proc_root_xscore = NULL;
+struct proc_dir_entry *proc_root_xcpm = NULL;
+struct proc_dir_entry *proc_root_xcpm_info = NULL;
+struct proc_dir_entry *proc_root_xcpm_links = NULL;
+struct proc_dir_entry *proc_root_xcpm_ports = NULL;
+
+static char *ib_port_phys_state_str[] = {
+       "0: Link Down",
+       "1: Sleep",
+       "2: Polling",
+       "3: Disabled",
+       "4: Port Configuration Training",
+       "5: Link Up",
+       "6: Link Error Recovery",
+       "7: Phy Test",
+};
+
+static char *port_state2str[] = {
+       "PORT_NOP",
+       "PORT_DOWN",
+       "PORT_INIT",
+       "PORT_ARMED",
+       "PORT_ACTIVE",
+       "PORT_ACTIVE_DEFER",
+};
+
+static char *port_linkLayer2str[] = {
+       "Unspecified",
+       "Infiniband",
+       "Ethernet",
+};
+
+static int xcpm_port_proc_open_device(struct inode *inode, struct file *file);
+static int xcpm_port_proc_read_device(struct seq_file *m, void *data);
+static ssize_t xcpm_port_proc_write_device(struct file *file,
+                                          const char __user *buffer,
+                                          size_t count, loff_t *offp);
+static int xcpm_xsmp_proc_open_device(struct inode *inode, struct file *file);
+static int xcpm_xsmp_proc_read_device(struct seq_file *m, void *data);
+static ssize_t xcpm_xsmp_proc_write_device(struct file *file,
+                                          const char __user *buffer,
+                                          size_t count, loff_t *offp);
+static int xscore_proc_open_debug(struct inode *inode, struct file *file);
+static int xscore_proc_read_debug(struct seq_file *m, void *data);
+static ssize_t xscore_proc_write_debug(struct file *file,
+                                      const char __user *buffer, size_t count,
+                                      loff_t *offp);
+static int xscore_proc_open_info(struct inode *inode, struct file *file);
+static int xscore_proc_read_info(struct seq_file *m, void *data);
+static ssize_t xscore_proc_write_info(struct file *file,
+                                     const char __user *buffer, size_t count,
+                                     loff_t *offp);
+static int xscore_proc_open_systemid(struct inode *inode, struct file *file);
+static int xscore_proc_read_systemid(struct seq_file *m, void *data);
+static ssize_t xscore_proc_write_systemid(struct file *file,
+                                         const char __user *buffer,
+                                         size_t count, loff_t *offp);
+static const struct file_operations xcpm_port_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xcpm_port_proc_open_device,
+       .read = seq_read,
+       .write = xcpm_port_proc_write_device,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations xcpm_xsmp_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xcpm_xsmp_proc_open_device,
+       .read = seq_read,
+       .write = xcpm_xsmp_proc_write_device,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations xscore_debug_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xscore_proc_open_debug,
+       .read = seq_read,
+       .write = xscore_proc_write_debug,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations xscore_info_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xscore_proc_open_info,
+       .read = seq_read,
+       .write = xscore_proc_write_info,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations xscore_systemid_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xscore_proc_open_systemid,
+       .read = seq_read,
+       .write = xscore_proc_write_systemid,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static void calc_time_fjiffies(unsigned long ojiffies, unsigned long *tsecs,
+                              unsigned long *tmins, unsigned long *thrs)
+{
+       unsigned long tmp_tsecs = 0;
+       *tsecs = *tmins = *thrs = 0;
+
+       tmp_tsecs = jiffies_to_msecs(jiffies - ojiffies) / 1000;
+       *thrs = tmp_tsecs / (60 * 60);
+       *tmins = (tmp_tsecs / 60 - ((*thrs) * 60));
+       *tsecs = tmp_tsecs - ((*tmins) * 60) - ((*thrs) * 60 * 60);
+}
+
+static ssize_t xcpm_port_proc_write_device(struct file *file,
+                                          const char __user *buffer,
+                                          size_t count, loff_t *offp)
+{
+       struct xscore_port *ib_port = NULL;
+
+       file->private_data = PDE_DATA(file_inode(file));
+       ib_port = (struct xscore_port *)file->private_data;
+
+       memset(ib_port->counters, 0, sizeof(ib_port->counters));
+       return count;
+}
+
+static int xcpm_port_proc_read_device(struct seq_file *m, void *data)
+{
+       struct xscore_port *ib_port = NULL;
+       struct ib_port_attr port_attr;
+       u64 fw_ver;
+       unsigned long tsecs = 0, tmins = 0, thrs = 0;
+
+       ib_port = (struct xscore_port *)m->private;
+
+       (void)ib_query_port(ib_port->xs_dev->device, ib_port->port_num,
+                           &port_attr);
+
+       seq_printf(m, "Device name: \t\t%s\n", ib_port->xs_dev->device->name);
+       fw_ver = ib_port->xs_dev->fw_ver;
+       seq_printf(m, "Device FW Version: \t%d.%d.%d\n", (int)(fw_ver >> 32),
+                  (int)((fw_ver >> 16) & 0xFFFF), (int)(fw_ver & 0xFFFF));
+       seq_printf(m, "Port: \t\t\t%d\n", ib_port->port_num);
+       seq_printf(m, "Port %s: \t\t0x%llx\n",
+                  ib_port->link_layer == IB_LINK_LAYER_ETHERNET ?
+                  "MAC" : "GUID", ib_port->guid);
+       seq_printf(m, "Port PhysState: \t%s\n",
+                  ib_port_phys_state_str[port_attr.phys_state]);
+       seq_printf(m, "Port State: \t\t%s\n", port_state2str[port_attr.state]);
+       if (ib_port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+               seq_printf(m, "Port LID: \t\t%d\n", port_attr.lid);
+               seq_printf(m, "Port SM LID: \t\t%d\n", port_attr.sm_lid);
+       } else {
+               if (ib_port->xs_dev->is_shca == 0 && port_attr.active_mtu == 4)
+                       port_attr.active_mtu = 5;
+       }
+       calc_time_fjiffies(ib_port->rec_poller_time, &tsecs, &tmins, &thrs);
+       seq_printf(m, "Last XCM poll :\t\t%lu hrs %lu mins %lu seconds\n",
+                  thrs, tmins, tsecs);
+       seq_printf(m, "Port XCM poll state: \t%d\n", ib_port->rec_poller_state);
+
+       /*
+        * IB8KTBD this reports wrong mtu for 8k IB Mtu defined for softhca
+        */
+       seq_printf(m, "Port MTU: \t\t%d (%d)\n", port_attr.active_mtu,
+                  xg_ib_mtu_enum_to_int(port_attr.active_mtu));
+
+       seq_printf(m, "Port Link Layer: \t%s\n",
+                  port_linkLayer2str[ib_port->link_layer]);
+       seq_puts(m, "\n");
+       if (ib_port->link_layer == IB_LINK_LAYER_INFINIBAND) {
+               seq_printf(m, "Port XDS LID: \t\t%d\n", ib_port->xds_lid);
+               seq_printf(m, "Port XDS GUID: \t\t0x%llx\n", ib_port->xds_guid);
+       }
+       seq_puts(m, "\n");
+
+       seq_printf(m, "Port Not Active Counter: \t%d\n",
+                  ib_port->counters[PORT_XDS_PORT_NOT_ACTIVE_COUNTER]);
+       seq_printf(m, "SA Query Error Counter: \t%d\n",
+                  ib_port->counters[PORT_XDS_SA_QUERY_ERROR_COUNTER]);
+       seq_printf(m, "SA Query Timeout Counter: \t%d\n",
+                  ib_port->counters[PORT_XDS_SA_QUERY_TOUT_COUNTER]);
+       seq_printf(m, "SA Query Counter: \t\t%d\n",
+                  ib_port->counters[PORT_XDS_SA_QUERY_COUNTER]);
+       seq_printf(m, "XDS Query Counter: \t\t%d\n",
+                  ib_port->counters[PORT_XDS_XDS_QUERY_COUNTER]);
+       seq_printf(m, "XDS Query Error Counter: \t%d\n",
+                  ib_port->counters[PORT_XDS_XDS_QUERY_ERROR_COUNTER]);
+       seq_printf(m, "XDS List Count Zero Counter: \t%d\n",
+                  ib_port->counters[PORT_XDS_LIST_COUNT_ZERO_COUNTER]);
+       seq_printf(m, "XDS Query Timeout Counter: \t%d\n",
+                  ib_port->counters[PORT_XDS_XDS_QUERY_TOUT_COUNTER]);
+       seq_printf(m, "XDS List Count Counter: \t%d\n",
+                  ib_port->counters[PORT_XDS_LIST_COUNT_COUNTER]);
+
+       return 0;
+}
+
+static int xcpm_port_proc_open_device(struct inode *inode, struct file *file)
+{
+       return single_open(file, xcpm_port_proc_read_device, PDE_DATA(inode));
+}
+
+void xcpm_port_add_proc_entry(struct xscore_port *port)
+{
+       struct proc_dir_entry *file;
+       char name[32];
+
+       if (test_and_set_bit(XSCORE_PORT_PROCFS_CREATED, &port->flags))
+               return;
+
+       sprintf(name, "%llx", port->guid);
+
+       file = proc_create_data(name, S_IFREG, proc_root_xcpm_ports,
+                               &xcpm_port_proc_fops, port);
+       if (!file)
+               pr_err("unable to create /proc/driver/xscore/xcpm/ports/%s.\n", name);
+}
+
+void xcpm_port_remove_proc_entry(struct xscore_port *port)
+{
+       char name[32];
+
+       sprintf(name, "%llx", port->guid);
+       remove_proc_entry(name, proc_root_xcpm_ports);
+       clear_bit(XSCORE_PORT_PROCFS_CREATED, &port->flags);
+}
+
+static ssize_t xcpm_xsmp_proc_write_device(struct file *file,
+                                          const char __user *buffer,
+                                          size_t count, loff_t *offp)
+{
+       struct xsmp_ctx *ctx = NULL;
+       int action, ret;
+       char    *buf = (char *) __get_free_page(GFP_USER);
+       if (!buf) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(buf, buffer, count - 1)) {
+               goto out;
+       }
+       buf[count] = '\0';
+
+       file->private_data = PDE_DATA(file_inode(file));
+       ctx = (struct xsmp_ctx *)file->private_data;
+
+       ret = kstrtoint(buf, 0, &action);
+       if (ret != 0) {
+               return -EINVAL;
+       }
+       switch (action) {
+       case 0:         /* Clear counters */
+               memset(ctx->counters, 0, sizeof(ctx->counters));
+               break;
+       case 4567:
+               pr_err("XSMP is shutdown by user %s : %s (0x%llx)\n",
+                      ctx->session_name, ctx->chassis_name, ctx->dguid);
+               set_bit(XSMP_SHUTTINGDOWN_BIT, &ctx->flags);
+               break;
+       default:
+               break;
+       }
+       return count;
+out:
+       free_page((unsigned long)buf);
+       return -EINVAL;
+}
+
+static int xcpm_xsmp_proc_read_device(struct seq_file *m, void *data)
+{
+       struct xsmp_ctx *xsmp_ctx = (struct xsmp_ctx *)m->private;
+       char *state_str = NULL;
+       unsigned long tsecs = 0, tmins = 0, thrs = 0;
+       char tmp_buf[256];
+
+       if (xsmp_ctx->state == XSMP_SESSION_CONNECTED)
+               state_str = "Up";
+       else
+               state_str = "Down";
+
+       seq_printf(m, "State:\t\t\t\t%s\n", state_str);
+       seq_printf(m, "Hello interval (secs):\t\t%d\n",
+                  xsmp_ctx->hello_timeout / (3 * HZ));
+       seq_printf(m, "Session timeout (secs):\t\t%d\n",
+                  xsmp_ctx->hello_timeout / HZ);
+       seq_printf(m, "Datapath timeout (secs):\t%d\n",
+                  xsmp_ctx->datapath_timeout);
+
+       seq_printf(m, "CA Device Name:\t\t\t%s\n",
+                  xsmp_ctx->port->xs_dev->device->name);
+       seq_printf(m, "Local port:\t\t\t%d\n", (int)xsmp_ctx->port->port_num);
+       seq_printf(m, "Local lid:\t\t\t%d\n", (int)xsmp_ctx->port->lid);
+       seq_printf(m, "Local guid:\t\t\t0x%Lx\n", xsmp_ctx->port->guid);
+       seq_printf(m, "Remote lid:\t\t\t%d\n", xsmp_ctx->dlid);
+       seq_printf(m, "Remote guid:\t\t\t0x%Lx\n", xsmp_ctx->dguid);
+
+       seq_printf(m, "Chassis's xcpm version:\t\t%x\n",
+                  xsmp_ctx->xsigo_xsmp_version);
+       seq_printf(m, "Chassis Name:\t\t\t%s\n", xsmp_ctx->chassis_name);
+       seq_printf(m, "Server-Profile Name:\t\t%s\n", xsmp_ctx->session_name);
+
+       seq_puts(m, "\n");
+       seq_printf(m, "Port Link Layer:\t\t%s\n",
+                  port_linkLayer2str[xsmp_ctx->port->link_layer]);
+       seq_puts(m, "\n");
+
+       if (xsmp_ctx->state == XSMP_SESSION_CONNECTED) {
+               int lqpn, dqpn;
+
+               lqpn = xsmp_ctx->conn_ctx.local_qpn;
+               dqpn = xsmp_ctx->conn_ctx.remote_qpn;
+
+               calc_time_fjiffies(xsmp_ctx->jiffies, &tsecs, &tmins, &thrs);
+               seq_printf(m, "QP end points:\t\t(0x%x, %d) : (0x%x, %d)\n",
+                          lqpn, lqpn, dqpn, dqpn);
+       }
+
+       tmp_buf[0] = 0;
+       if (test_bit(XSMP_REG_SENT, &xsmp_ctx->flags))
+               strcat(tmp_buf, "XSMP Reg Sent");
+       else
+               strcat(tmp_buf, "XSMP Reg Not Sent");
+       if (test_bit(XSMP_REG_CONFIRM_RCVD, &xsmp_ctx->flags))
+               strcat(tmp_buf, " + XSMP Reg Conf Rcvd");
+       else
+               strcat(tmp_buf, " + XSMP Reg Conf Not Rcvd");
+
+       if (test_bit(XSMP_IBLINK_DOWN, &xsmp_ctx->flags))
+               strcat(tmp_buf, " + IB Link Down");
+
+       if (xsmp_ctx->conn_ctx.features & XSCORE_USE_CHECKSUM)
+               strcat(tmp_buf, " + Checksum Mode");
+       else
+               strcat(tmp_buf, " + ICRC Mode");
+
+       seq_printf(m, "%s\n\n", tmp_buf);
+
+       seq_printf(m, "Session Uptime:\t\t\t%lu hrs %lu mins %lu seconds\n",
+                  thrs, tmins, tsecs);
+
+       calc_time_fjiffies(xsmp_ctx->hello_jiffies, &tsecs, &tmins, &thrs);
+       seq_printf(m, "Last Hello received :\t\t%lu hrs %lu mins %lu seconds\n",
+                  thrs, tmins, tsecs);
+       seq_printf(m, "Number of session timeouts:\t%d\n",
+                  xsmp_ctx->counters[XSMP_SESSION_TIMEOUT_COUNTER]);
+       seq_printf(m, "Reg Sent Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_REG_SENT_COUNTER]);
+       seq_printf(m, "Resource List Sent Counter:\t%d\n",
+                  xsmp_ctx->counters[XSMP_RES_LIST_COUNTER]);
+       seq_printf(m, "Reg Confirm Rcvd Counter:\t%d\n",
+                  xsmp_ctx->counters[XSMP_REG_CONF_COUNTER]);
+       seq_printf(m, "Rej Rcvd Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_REJ_RCVD_COUNTER]);
+       seq_printf(m, "Shutdown Rcvd Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_SHUTDOWN_RCVD_COUNTER]);
+       seq_printf(m, "XVE Type Rcvd Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_XVE_MESSAGE_COUNTER]);
+       seq_printf(m, "VNIC Type Rcvd Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_VNIC_MESSAGE_COUNTER]);
+       seq_printf(m, "VHBA Type Rcvd Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_VHBA_MESSAGE_COUNTER]);
+       seq_printf(m, "USPACE Type Rcvd Counter:\t%d\n",
+                  xsmp_ctx->counters[XSMP_USPACE_MESSAGE_COUNTER]);
+       seq_printf(m, "SESSION Type Rcvd Counter:\t%d\n",
+                  xsmp_ctx->counters[XSMP_SESSION_MESSAGE_COUNTER]);
+       seq_printf(m, "VHBA Type Sent Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_VHBA_MESSAGE_SENT_COUNTER]);
+       seq_printf(m, "VNIC Type Sent Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_VNIC_MESSAGE_SENT_COUNTER]);
+       seq_printf(m, "USPACE Type Sent Counter:\t%d\n",
+                  xsmp_ctx->counters[XSMP_USPACE_MESSAGE_SENT_COUNTER]);
+       seq_printf(m, "SESSION Type Sent Counter:\t%d\n",
+                  xsmp_ctx->counters[XSMP_SESSION_MESSAGE_SENT_COUNTER]);
+       seq_printf(m, "Hello recv count:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_HELLO_RCVD_COUNTER]);
+       seq_printf(m, "Hello recv(INTERRUPT_MODE):\t%d\n",
+                  xsmp_ctx->counters[XSMP_HELLO_INTERRUPT_COUNTER]);
+       seq_printf(m, "Hello send count:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_HELLO_SENT_COUNTER]);
+       seq_printf(m, "Seq Number Mismatch Counter:\t%d\n",
+                  xsmp_ctx->counters[XSMP_SEQ_MISMATCH_COUNTER]);
+       seq_printf(m, "Ring Full Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_SESSION_RING_FULL_COUNTER]);
+       seq_printf(m, "Send Error Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_SESSION_SEND_ERROR_COUNTER]);
+       seq_printf(m, "Conn Down Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_SESSION_CONN_DOWN_COUNTER]);
+       seq_printf(m, "Total XSMP msg Counter:\t\t%d\n",
+                  xsmp_ctx->counters[XSMP_TOTAL_MSG_SENT_COUNTER]);
+       seq_printf(m, "Session Conn Retry Counter:\t%d\n",
+                  xsmp_ctx->counters[XSMP_CONN_RETRY_COUNTER]);
+       seq_printf(m, "Session Conn Failed Counter:\t%d\n",
+                  xsmp_ctx->counters[XSMP_CONN_FAILED_COUNTER]);
+       seq_printf(m, "Session Conn Success Counter:\t%d\n",
+                  xsmp_ctx->counters[XSMP_CONN_SUCCESS_COUNTER]);
+       return 0;
+}
+
+static int xcpm_xsmp_proc_open_device(struct inode *inode, struct file *file)
+{
+       return single_open(file, xcpm_xsmp_proc_read_device, PDE_DATA(inode));
+}
+
+void xcpm_xsmp_add_proc_entry(struct xsmp_ctx *xsmp_ctx)
+{
+       struct proc_dir_entry *file;
+       char name[32];
+
+       sprintf(name, "%d", xsmp_ctx->idr);
+
+       file = proc_create_data(name, S_IFREG, proc_root_xcpm_links,
+                               &xcpm_xsmp_proc_fops, xsmp_ctx);
+       if (!file)
+               pr_err("Unable to create /proc/driver/xscore/xcpm/links/%s.\n", name);
+}
+
+void xcpm_xsmp_remove_proc_entry(struct xsmp_ctx *xsmp_ctx)
+{
+       char name[32];
+
+       sprintf(name, "%d", xsmp_ctx->idr);
+       remove_proc_entry(name, proc_root_xcpm_links);
+}
+
+static ssize_t xscore_proc_write_systemid(struct file *file,
+                                         const char __user *buffer,
+                                         size_t count, loff_t *offp)
+{
+       char    *buf = (char *) __get_free_page(GFP_USER);
+       if (!buf) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(buf, buffer, count - 1)) {
+               goto out;
+       }
+       buf[count] = '\0';
+
+       memcpy(system_id_str, buf, count);
+       if (system_id_str[count - 1] == '\n')
+               system_id_str[count - 1] = 0;
+       else
+               system_id_str[count] = 0;
+       return count;
+out:
+       free_page((unsigned long)buf);
+       return -EINVAL;
+}
+
+static int xscore_proc_read_systemid(struct seq_file *m, void *data)
+{
+       if (system_id_str[0])
+               seq_printf(m, "system_id:\t\t\t%s\n", system_id_str);
+       else
+               seq_puts(m, "system_id:\t\t\t<NULL>\n");
+       return 0;
+}
+
+static int xscore_proc_open_systemid(struct inode *inode, struct file *file)
+{
+       return single_open(file, xscore_proc_read_systemid, PDE_DATA(inode));
+}
+
+static ssize_t xscore_proc_write_info(struct file *file,
+                                     const char __user *buffer, size_t count,
+                                     loff_t *offp)
+{
+       int cc = count > XSIGO_MAX_HOSTNAME ? XSIGO_MAX_HOSTNAME : count;
+       char    *buf = (char *) __get_free_page(GFP_USER);
+       if (!buf) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(buf, buffer, cc - 1)) {
+               goto out;
+       }
+       buf[cc] = '\0';
+
+       memcpy(hostname_str, buf, cc);
+       /*
+        * The last character is a newline, overwrite it
+        */
+       if (hostname_str[cc - 1] == '\n')
+               hostname_str[cc - 1] = 0;
+       else
+               hostname_str[cc] = 0;
+       return count;
+out:
+       free_page((unsigned long)buf);
+       return -EINVAL;
+}
+
+static int xscore_proc_read_info(struct seq_file *m, void *data)
+{
+       char buf[XSIGO_MAX_HOSTNAME];
+
+       seq_printf(m, "ULP services mask:\t\t0x%x\n", xcpm_resource_flags);
+       seq_printf(m, "Boot_flag:\t\t\t%d\n", boot_flag);
+       if (system_id_str[0])
+               seq_printf(m, "system_id:\t\t\t%s\n", system_id_str);
+       else
+               seq_puts(m, "system_id:\t\t\t<NULL>\n");
+       snprintf(buf, XSIGO_MAX_HOSTNAME, "HostName:\t\t\t%s\n", hostname_str);
+       seq_puts(m, buf);
+       if (os_version)
+               seq_printf(m, "OS version:\t\t\t%s\n", os_version);
+       if (os_arch)
+               seq_printf(m, "OS Arch:\t\t\t%s\n", os_arch);
+       return 0;
+}
+
+static int xscore_proc_open_info(struct inode *inode, struct file *file)
+{
+       return single_open(file, xscore_proc_read_info, PDE_DATA(inode));
+}
+
+static int xscore_proc_read_debug(struct seq_file *m, void *data)
+{
+       unsigned long tsecs = 0, tmins = 0, thrs = 0;
+
+       calc_time_fjiffies(xscore_wq_jiffies, &tsecs, &tmins, &thrs);
+
+       seq_printf(m, "Total wait time(secs): %ld\n", (xscore_wait_time / HZ));
+       seq_printf(m, "Debug Bit mask : 0x%x\n", xscore_debug);
+       seq_printf(m, "Force sm change : 0x%x\n", xscore_force_sm_change);
+       seq_printf(m, "Workqueue state : 0x%lx\n", xscore_wq_state);
+       seq_printf(m, "Last WQ(%lx) trigger time :\t%lu hrs",
+               xscore_last_wq, thrs);
+       seq_printf(m, "Last WQ : %lu mins %lu seconds\n", tmins, tsecs);
+
+       return 0;
+}
+
+static ssize_t xscore_proc_write_debug(struct file *file,
+                                      const char __user *buffer, size_t count,
+                                      loff_t *offp)
+{
+       int     ret;
+       char    *buf = (char *) __get_free_page(GFP_USER);
+       if (!buf) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(buf, buffer, count - 1)) {
+               goto out;
+       }
+       buf[count] = '\0';
+
+       ret = kstrtoint(buf, 0, &xscore_debug);
+       if (ret != 0) {
+               return -EINVAL;
+       }
+       ret = kstrtoint(buf, 0, &xscore_force_sm_change);
+       if (ret != 0) {
+               return -EINVAL;
+       }
+       return count;
+out:
+       free_page((unsigned long)buf);
+       return -EINVAL;
+}
+
+static int xscore_proc_open_debug(struct inode *inode, struct file *file)
+{
+       return single_open(file, xscore_proc_read_debug, PDE_DATA(inode));
+}
+
+int xscore_create_procfs_entries(void)
+{
+       int ret = 0;
+       struct proc_dir_entry *file_d;
+
+       proc_root_xscore = proc_mkdir("driver/xscore", NULL);
+       if (!proc_root_xscore) {
+               pr_err("Unable to create /proc/driver/xscore\n");
+               return -ENOMEM;
+       }
+       file_d = proc_create_data("debug", S_IFREG, proc_root_xscore,
+                                 &xscore_debug_proc_fops, NULL);
+       if (!file_d) {
+               pr_err(PFX
+                      "Unable to create /proc/driver/xscore/debug\n");
+               ret = -ENOMEM;
+               goto no_debug;
+       }
+
+       file_d = proc_create_data("info", S_IFREG, proc_root_xscore,
+                                 &xscore_info_proc_fops, NULL);
+       if (!file_d) {
+               pr_err(PFX
+                      "Unable to create /proc/driver/xscore/info\n");
+               ret = -ENOMEM;
+               goto no_info;
+       }
+
+       file_d = proc_create_data("systemid", S_IFREG, proc_root_xscore,
+                                 &xscore_systemid_proc_fops, NULL);
+       if (!file_d) {
+               pr_err(PFX
+                      "Unable to create /proc/driver/xscore/systermid\n");
+               ret = -ENOMEM;
+               goto no_systemid;
+       }
+
+       proc_root_xcpm = proc_mkdir("xcpm", proc_root_xscore);
+       if (!proc_root_xcpm) {
+               pr_err(PFX
+                      "Unable to create /proc/driver/xscore/xcpm\n");
+               ret = -ENOMEM;
+               goto no_xcpm;
+       }
+
+       proc_root_xcpm_links = proc_mkdir("links", proc_root_xcpm);
+       if (!proc_root_xcpm_links) {
+               pr_err(PFX
+                      "Unable to create /proc/driver/xscore/xcpm/links\n");
+               ret = -ENOMEM;
+               goto no_links;
+       }
+       proc_root_xcpm_ports = proc_mkdir("ports", proc_root_xcpm);
+       if (!proc_root_xcpm_ports) {
+               pr_err(PFX
+                      "Unable to create /proc/driver/xscore/xcpm/ports\n");
+               ret = -ENOMEM;
+               goto no_ports;
+       }
+       return 0;
+
+no_ports:
+       remove_proc_entry("links", proc_root_xcpm);
+no_links:
+       remove_proc_entry("xcpm", proc_root_xscore);
+no_xcpm:
+       remove_proc_entry("systemid", proc_root_xscore);
+no_systemid:
+       remove_proc_entry("info", proc_root_xscore);
+no_info:
+       remove_proc_entry("debug", proc_root_xscore);
+no_debug:
+       remove_proc_entry("driver/xscore", NULL);
+       return ret;
+}
+
+void xscore_remove_procfs_entries(void)
+{
+       remove_proc_entry("ports", proc_root_xcpm);
+       remove_proc_entry("links", proc_root_xcpm);
+       remove_proc_entry("xcpm", proc_root_xscore);
+       remove_proc_entry("systemid", proc_root_xscore);
+       remove_proc_entry("info", proc_root_xscore);
+       remove_proc_entry("debug", proc_root_xscore);
+       remove_proc_entry("driver/xscore", NULL);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xscore_uadm.c b/drivers/infiniband/ulp/xsigo/xscore/xscore_uadm.c
new file mode 100644 (file)
index 0000000..11ac102
--- /dev/null
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * This file implements USPACE protocol
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/sched.h>
+#include <linux/poll.h>
+
+#include "xscore_priv.h"
+#include "xscore.h"
+#include "xsmp_common.h"
+#include "xscore.h"
+
+#define        PFX     "UADM"
+
+static dev_t xscore_devt;
+static struct cdev xscore_cdev;
+static struct list_head read_list;
+static int xscore_svc_id = -1;
+struct mutex mut_lock;
+static unsigned long xscore_uadm_flags;
+static atomic_t list_count;
+static struct class *uadm_class;
+static DECLARE_WAIT_QUEUE_HEAD(read_wait);
+
+#define        XSCORE_UADM_OPEN        0x1
+
+#define        XSCORE_UADM_MAX_MSGS    256
+
+struct xscore_uadm_hdr {
+       u8 opcode;
+       int flags;
+       void *xsmp_hndl;
+};
+
+enum {
+       XSCORE_UADM_CHASSIS_MSG = 1,
+       XSCORE_UADM_REG_MSG,
+};
+
+struct xscore_uadm_msg {
+       struct list_head list;
+       struct xscore_uadm_hdr hdr;
+       void *msg;
+       int len;
+};
+
+/*
+ * Called from thread context
+ */
+void xscore_uadm_receive(void *xsmp_hndl, u8 *data, int len)
+{
+       struct xscore_uadm_msg *msg;
+       int err = 0;
+
+       mutex_lock(&mut_lock);
+       if (!xsigod_enable) {
+               err++;
+               goto out;
+       }
+       if (atomic_read(&list_count) > XSCORE_UADM_MAX_MSGS) {
+               UADM_ERROR("%s: receive Q full, dropping packet\n", __func__);
+               err++;
+               goto out;
+       }
+       msg = kmalloc(sizeof(*msg), GFP_KERNEL);
+       if (!msg) {
+               err++;
+               goto out;
+       }
+       msg->msg = data;
+       msg->hdr.xsmp_hndl = xsmp_hndl;
+       msg->hdr.flags = 0;
+       msg->hdr.opcode = XSCORE_UADM_CHASSIS_MSG;
+       msg->len = len;
+       list_add_tail(&msg->list, &read_list);
+       atomic_inc(&list_count);
+       wake_up_interruptible(&read_wait);
+out:
+       if (err)
+               kfree(data);
+       mutex_unlock(&mut_lock);
+}
+
+/*
+ * Called from thread context
+ */
+static void xscore_event_handler(void *xsmp_hndl, int event)
+{
+       mutex_lock(&mut_lock);
+       switch (event) {
+       default:
+               break;
+       }
+       mutex_unlock(&mut_lock);
+}
+
+static int xscore_uadm_register(void)
+{
+       struct xsmp_service_reg_info sinfo = {
+               .receive_handler = xscore_uadm_receive,
+               .event_handler = xscore_event_handler,
+               .ctrl_message_type = XSMP_MESSAGE_TYPE_USPACE,
+               .resource_flag_index = RESOURCE_FLAG_INDEX_USPACE
+       };
+       int ret = 0;
+
+       UADM_FUNCTION("%s:\n", __func__);
+       xscore_svc_id = xcpm_register_service(&sinfo);
+       if (xscore_svc_id < 0) {
+               UADM_ERROR("%s: xcpm_register_service failed %d\n",
+                          __func__, xscore_svc_id);
+               clear_bit(XSCORE_UADM_OPEN, &xscore_uadm_flags);
+               ret = -ENODEV;
+       }
+
+       UADM_INFO("%s: Successful\n", __func__);
+       return ret;
+}
+
+static int xscore_uadm_open(struct inode *inode, struct file *file)
+{
+       int ret = 0;
+
+       if (test_and_set_bit(XSCORE_UADM_OPEN, &xscore_uadm_flags)) {
+               UADM_ERROR("%s: Already open\n", __func__);
+               ret = -EBUSY;
+       }
+       return ret;
+}
+
+static int xscore_uadm_release(struct inode *inode, struct file *file)
+{
+       struct xscore_uadm_msg *msg, *tmsg;
+
+       mutex_lock(&mut_lock);
+       /* unregister service */
+       xcpm_unregister_service(xscore_svc_id);
+       xscore_svc_id = -1;
+       list_for_each_entry_safe(msg, tmsg, &read_list, list) {
+               list_del(&msg->list);
+               kfree(msg->msg);
+               kfree(msg);
+       }
+       clear_bit(XSCORE_UADM_OPEN, &xscore_uadm_flags);
+       mutex_unlock(&mut_lock);
+       UADM_INFO("%s: Successful\n", __func__);
+       return 0;
+}
+
+static unsigned int xscore_uadm_poll(struct file *file, poll_table *wait)
+{
+       unsigned int pollflags = 0;
+
+       poll_wait(file, &read_wait, wait);
+       mutex_lock(&mut_lock);
+       if (!list_empty(&read_list))
+               pollflags = POLLIN | POLLRDNORM;
+       mutex_unlock(&mut_lock);
+       return pollflags;
+}
+
+#define        HDR_LEN (sizeof(struct xscore_uadm_hdr))
+
+/*
+ * Make it a blocking call later XXX
+ */
+static ssize_t xscore_uadm_read(struct file *file, char __user *buf,
+                               size_t count, loff_t *ppos)
+{
+       int ret;
+       struct xscore_uadm_msg *msg;
+
+       mutex_lock(&mut_lock);
+       if (list_empty(&read_list)) {
+               ret = -ENODATA;
+               goto out;
+       }
+       msg = list_entry(read_list.next, struct xscore_uadm_msg, list);
+       list_del(&msg->list);
+       atomic_dec(&list_count);
+       ret = msg->len > (count - HDR_LEN) ? (count - HDR_LEN) : msg->len;
+       if (copy_to_user(buf, &msg->hdr, HDR_LEN) ||
+           copy_to_user(buf + HDR_LEN, msg->msg, ret))
+               ret = -EFAULT;
+       *ppos += (ret + HDR_LEN);
+       kfree(msg->msg);
+       kfree(msg);
+out:
+       mutex_unlock(&mut_lock);
+       return ret;
+}
+
+static ssize_t xscore_uadm_write(struct file *file, const char __user *buf,
+                                size_t count, loff_t *ppos)
+{
+       void *msg = NULL;
+       int len;
+       struct xscore_uadm_hdr hdr;
+       int ret;
+
+       len = count - HDR_LEN;
+       if (len) {
+               msg = kmalloc(len, GFP_KERNEL);
+               if (!msg)
+                       return -ENOMEM;
+       }
+       mutex_lock(&mut_lock);
+       if (copy_from_user(&hdr, buf, HDR_LEN) ||
+           (len && copy_from_user(msg, buf + HDR_LEN, len))) {
+               UADM_ERROR("%s: copy_from_user error\n", __func__);
+               ret = -EFAULT;
+               if (msg != NULL)
+                       kfree(msg);
+               goto out;
+       }
+       /*
+        * Check type of command and handle it accordingly
+        */
+       switch (hdr.opcode) {
+       case XSCORE_UADM_REG_MSG:
+               if (xscore_uadm_register())
+                       ret = -EBUSY;
+               else {
+                       ret = count;
+                       *ppos += count;
+               }
+               goto out;
+       default:
+               break;
+       }
+
+       ret = xcpm_send_message(hdr.xsmp_hndl, xscore_svc_id, msg, len);
+       if (ret) {
+               UADM_ERROR("%s: xcpm_send_message error  %d sess hndl: %p\n",
+                          __func__, ret, hdr.xsmp_hndl);
+               ret = -EINVAL;
+               if (msg != NULL)
+                       kfree(msg);
+               goto out;
+       }
+       ret = count;
+       *ppos += count;
+out:
+       mutex_unlock(&mut_lock);
+       return ret;
+}
+
+static const struct file_operations xscore_fops = {
+       .open = xscore_uadm_open,
+       .release = xscore_uadm_release,
+       .read = xscore_uadm_read,
+       .write = xscore_uadm_write,
+       .poll = xscore_uadm_poll,
+       .owner = THIS_MODULE,
+};
+
+void xscore_uadm_destroy(void)
+{
+       device_destroy(uadm_class,
+                      MKDEV(MAJOR(xscore_devt), MINOR(xscore_devt)));
+       class_destroy(uadm_class);
+       cdev_del(&xscore_cdev);
+       unregister_chrdev_region(xscore_devt, 1);
+       mutex_destroy(&mut_lock);
+}
+
+int xscore_uadm_init(void)
+{
+       int result;
+
+       INIT_LIST_HEAD(&read_list);
+       mutex_init(&mut_lock);
+
+       result = alloc_chrdev_region(&xscore_devt, 0, 1, "kxsigod");
+       if (result) {
+               UADM_ERROR("%s: alloc_chrdev_region error %d\n", __func__,
+                          result);
+               mutex_destroy(&mut_lock);
+               return result;
+       }
+
+       cdev_init(&xscore_cdev, &xscore_fops);
+
+       result = cdev_add(&xscore_cdev, xscore_devt, 1);
+       if (result) {
+               UADM_ERROR("%s: cdev_add error %d\n", __func__, result);
+               unregister_chrdev_region(xscore_devt, 1);
+               mutex_destroy(&mut_lock);
+               return result;
+       }
+       uadm_class = class_create(THIS_MODULE, "kxsigod");
+       if (IS_ERR(uadm_class)) {
+               result = PTR_ERR(uadm_class);
+               UADM_ERROR("%s: class_create  error %d\n", __func__, result);
+               cdev_del(&xscore_cdev);
+               unregister_chrdev_region(xscore_devt, 1);
+               mutex_destroy(&mut_lock);
+               return result;
+       }
+       device_create(uadm_class, 0,
+                     MKDEV(MAJOR(xscore_devt), MINOR(xscore_devt)), 0,
+                     "kxsigod");
+       return 0;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xscore_vpci.c b/drivers/infiniband/ulp/xsigo/xscore/xscore_vpci.c
new file mode 100644 (file)
index 0000000..e54b5a9
--- /dev/null
@@ -0,0 +1,245 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/sysfs.h>
+#include <linux/syscalls.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/proc_fs.h>
+#include <linux/spinlock.h>
+
+#include "xs_compat.h"
+#include "xscore.h"
+
+int xscore_vpci_enable = 1;
+module_param(xscore_vpci_enable, int, 0644);
+
+#define PCI_VENDOR_ID_XSIGO            0x199d
+#define PCI_DEVICE_ID_XSIGO_VNIC       0x8209
+
+static struct pci_bus *vbus;
+static struct pci_sysdata *sysdata;
+
+static DEFINE_PCI_DEVICE_TABLE(xs_vpci_dev_table) = {
+       {PCI_DEVICE(PCI_VENDOR_ID_XSIGO, PCI_DEVICE_ID_XSIGO_VNIC)},
+       {0}
+};
+
+MODULE_DEVICE_TABLE(pci, xs_vpci_dev_table);
+
+int xs_vpci_read(struct pci_bus *bus, unsigned int devfn, int where,
+                int size, u32 *val)
+{
+       switch (where) {
+       case PCI_VENDOR_ID:
+               *val = PCI_VENDOR_ID_XSIGO | PCI_DEVICE_ID_XSIGO_VNIC << 16;
+               /* our id */
+               break;
+       case PCI_COMMAND:
+               *val = 0;
+               break;
+       case PCI_HEADER_TYPE:
+               *val = PCI_HEADER_TYPE_NORMAL;
+               break;
+       case PCI_STATUS:
+               *val = 0;
+               break;
+       case PCI_CLASS_REVISION:
+               *val = (2 << 24) | (0 << 16) | 1;
+               /* network class, ethernet controller, revision 1 */
+               break;
+       case PCI_INTERRUPT_PIN:
+               *val = 0;
+               break;
+       case PCI_SUBSYSTEM_VENDOR_ID:
+               *val = 0;
+               break;
+       case PCI_SUBSYSTEM_ID:
+               *val = 0;
+               break;
+       default:
+               *val = 0;
+               /* sensible default */
+       }
+       return 0;
+}
+
+int xs_vpci_write(struct pci_bus *bus, unsigned int devfn, int where,
+                 int size, u32 val)
+{
+       switch (where) {
+       case PCI_BASE_ADDRESS_0:
+       case PCI_BASE_ADDRESS_1:
+       case PCI_BASE_ADDRESS_2:
+       case PCI_BASE_ADDRESS_3:
+       case PCI_BASE_ADDRESS_4:
+       case PCI_BASE_ADDRESS_5:
+               break;
+       }
+       return 0;
+}
+
+struct pci_ops xs_vpci_ops = {
+       .read = xs_vpci_read,
+       .write = xs_vpci_write
+};
+
+struct pci_dev *xs_vpci_prep_vnic(struct net_device *netdev, char *vnic_name,
+                                 int devn)
+{
+       struct pci_dev *pcidev = NULL;
+       /* netdev->ifindex always comes as zero
+        * for rhel5 versions before registration
+        */
+
+       if (!boot_flag || vbus == NULL)
+               return NULL;
+
+       pcidev = pci_scan_single_device(vbus, devn);
+
+       if (pcidev == NULL)
+               return NULL;
+
+       pci_dev_get(pcidev);
+
+       pci_bus_add_devices(vbus);
+       SET_NETDEV_DEV(netdev, &pcidev->dev);
+       return pcidev;
+}
+EXPORT_SYMBOL(xs_vpci_prep_vnic);
+
+void *xs_vpci_add_vnic(char *vnic_name, int devn)
+{
+       struct pci_dev *pcidev;
+       struct net_device *netdev;
+       int ret;
+
+       if (vbus == NULL)
+               return NULL;
+       pcidev = pci_scan_single_device(vbus, devn);
+       if (pcidev == NULL)
+               return NULL;
+
+       pci_dev_get(pcidev);
+       /*
+        * Better to use compat layer, but for now since this is citrix specific
+        * will use LINUX version magic
+        */
+       netdev = dev_get_by_name(&init_net, vnic_name);
+       if (netdev == NULL) {
+               pci_dev_put(pcidev);
+               return NULL;
+       }
+       pci_bus_add_device(pcidev);
+
+       ret = sysfs_create_link(&netdev->dev.kobj, &pcidev->dev.kobj, "device");
+       if (ret) {
+               pci_stop_and_remove_bus_device(pcidev);
+               dev_put(netdev);
+               pci_dev_put(pcidev);
+               pcidev = NULL;
+       }
+       return pcidev;
+}
+EXPORT_SYMBOL(xs_vpci_add_vnic);
+
+void xs_vpci_remove_vnic(struct net_device *netdev, void *hndl)
+{
+       struct pci_dev *pcidev = hndl;
+
+       if (vbus == NULL)
+               return;
+       if (!boot_flag) {
+               sysfs_remove_link(&netdev->dev.kobj, "device");
+               dev_put(netdev);
+       }
+       pci_stop_and_remove_bus_device(pcidev);
+       pci_dev_put(pcidev);
+}
+EXPORT_SYMBOL(xs_vpci_remove_vnic);
+
+void xs_vpci_vdev_remove(struct pci_dev *dev)
+{
+}
+
+static struct pci_driver xs_vpci_vdev_driver = {
+       .name = "Xsigo-Virtual-NIC",
+       .id_table = xs_vpci_dev_table,
+       .remove = xs_vpci_vdev_remove
+};
+
+int xs_vpci_bus_init(void)
+{
+       int i = 100;
+
+       if (!xscore_vpci_enable)
+               return 0;
+
+       sysdata = kzalloc(sizeof(void *), GFP_KERNEL);
+       while (i > 0) {
+               vbus = pci_scan_bus_parented(NULL, i, &xs_vpci_ops, sysdata);
+               if (vbus != NULL)
+                       break;
+               memset(sysdata, 0, sizeof(void *));
+               i--;
+       }
+       if (vbus == NULL) {
+               kfree(sysdata);
+               return -EINVAL;
+       }
+       if (pci_register_driver(&xs_vpci_vdev_driver) < 0) {
+               pci_remove_bus(vbus);
+               vbus = NULL;
+               return -EINVAL;
+       }
+       return 0;
+}
+
+void xs_vpci_bus_remove(void)
+{
+       if (vbus) {
+               pci_unregister_driver(&xs_vpci_vdev_driver);
+               device_unregister(vbus->bridge);
+               pci_remove_bus(vbus);
+               kfree(sysdata);
+               vbus = NULL;
+       }
+}
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xscore_xds.h b/drivers/infiniband/ulp/xsigo/xscore/xscore_xds.h
new file mode 100644 (file)
index 0000000..b2d4886
--- /dev/null
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSCORE_XDS_H__
+#define __XSCORE_XDS_H__
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+
+#define XCM_REC_VERSION 1
+#define MAX_XCFM_COUNT 8
+
+#define XSIGO_MGMT_CLASS        0x0B
+#define XSIGO_MGMT_CLASS_VERSION 0x02
+
+#define IB_MAD_ATTR_XCM_REQUEST         0xB002
+
+#define XSIGO_MGMT_METHOD_GET  IB_MGMT_METHOD_GET
+#define XSIGO_MGMT_METHOD_SET  IB_MGMT_METHOD_SET
+
+#define XSIGO_MAX_HOSTNAME             65
+#define XSIGO_MAX_OS_VERSION_LEN       32
+#define XSIGO_MAX_OS_ARCH_LEN          16
+#define XSIGO_MAX_BUILD_VER_LEN                16
+
+struct xcfm_record {
+       u64 port_id;
+       u16 xcm_lid;            /* lid of the XCM port */
+       u8 reserved[10];
+} __packed;
+
+struct xcm_list {
+       u8 count;
+       u8 xcm_version;
+       u8 reserved[2];
+       struct xcfm_record xcms[MAX_XCFM_COUNT];
+};
+
+struct server_info {
+       u32 vm_id;
+       u64 port_id;
+} __packed;
+
+struct xds_request {
+       struct server_info server_record;
+       char hostname[XSIGO_MAX_HOSTNAME];
+       char os_version[XSIGO_MAX_OS_VERSION_LEN];
+       char os_arch[XSIGO_MAX_OS_ARCH_LEN];
+       uint32_t os_type;
+       uint64_t fw_version;
+       uint32_t hw_version;
+       uint32_t driver_version;
+       uint64_t system_id_l;
+       uint64_t system_id_h;
+       uint32_t reserved;      /* For sending capablilties */
+       char build_version[XSIGO_MAX_BUILD_VER_LEN];
+} __packed;
+
+struct ib_xds_mad {
+       struct ib_mad_hdr mad_hdr;
+       u8 reserved[IB_MGMT_SA_HDR - IB_MGMT_MAD_HDR];
+       u8 data[IB_MGMT_SA_DATA];
+} __packed;
+
+/* Discovery solicitation packet.
+ *      Sent by server as mcast request to all chassis.  (xds_request)
+ *      Sent by chassis as unicast response to server.   (xcm_rsp_msg_t)
+ */
+#define XDP_MSG_TYPE_DISC_SOL 0x1
+
+#define XDP_FLAGS_REQ 0x1
+#define XDP_FLAGS_RSP 0x2
+
+struct xdp_hdr {
+       uint16_t type;
+       uint16_t len;
+       uint16_t flags;
+       uint32_t reserved1;
+       uint32_t reserved2;
+       uint32_t chksum;
+} __packed;
+
+struct xdds_disc_req {
+       struct xdp_hdr xhdr;
+       struct xds_request req;
+} __packed;
+
+struct xdp_info {
+#define XDP_FABRIC_MTU_1K 0
+#define XDP_FABRIC_MTU_2K 1
+#define XDP_FABRIC_MTU_4K 2
+       uint8_t fabric_mtu;
+       uint8_t xsmp_vlan;
+       uint8_t xsmp_cos;
+       uint8_t resv1;
+       uint32_t reserved[63];
+} __packed;
+
+struct xdds_work {
+       struct work_struct work;
+       u8 *msg;
+       int msg_len;
+       struct xscore_port *port;
+};
+
+#endif /*__XSCORE_XDS_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xsmp.c b/drivers/infiniband/ulp/xsigo/xscore/xsmp.c
new file mode 100644 (file)
index 0000000..c7645a7
--- /dev/null
@@ -0,0 +1,1282 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * This file implements XSMP protocol
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+
+#include "xscore_priv.h"
+#include "xscore.h"
+#include "xs_versions.h"
+#include "xsmp.h"
+#include "xs_compat.h"
+
+#define        MAX_XSMP_MSG_SIZE       1024
+
+#define XSMP_SERVICE_ID                0x02139701
+
+#define        QUEUE_CONN_DELAY        (1000 * 10)
+
+struct xsmp_work {
+       struct work_struct work;
+       struct xsmp_ctx *xsmp_ctx;
+       void *msg;
+       int len;
+       int status;
+};
+
+static struct list_head gxsmp_list;
+static struct idr xsmp_id_table;
+static spinlock_t xsmp_glob_lock;
+u32 xcpm_resource_flags;
+unsigned long xscore_wait_time;
+/*
+ * This mutex is used to protect service structure
+ */
+struct mutex svc_mutex;
+struct mutex xsmp_mutex;
+
+/*
+ * xscore_wait_in_boot will be the one which controls vnics,vhbas wait also
+ * Disable this in ESX , OVM , CITRIX ......
+  */
+int boot_flag = 1;
+int xscore_wait_in_boot = 1;
+module_param(boot_flag, int, 0444);
+module_param(xscore_wait_in_boot, int, 0644);
+
+int xscore_handle_hello_msg;
+module_param(xscore_handle_hello_msg, int, 0444);
+
+int xsigod_enable;
+module_param(xsigod_enable, int, 0444);
+
+static int xsmp_ring_size = 256;
+module_param(xsmp_ring_size, int, 0644);
+static int xscore_sess_wait_time = 600;
+module_param(xscore_sess_wait_time, int, 0644);
+
+#define        MAX_NUM_SVCS            XSMP_MESSAGE_TYPE_MAX
+
+static struct xsmp_service_reg_info xcpm_services[MAX_NUM_SVCS];
+
+static void xsmp_cleanup_session(struct xsmp_ctx *ctx);
+static int xsmp_session_create(struct xscore_port *port, u64 dguid, u16 dlid);
+static int xsmp_send_resource_list(struct xsmp_ctx *ctx, u32 rflags);
+static int xsmp_sess_disconnect(struct xsmp_ctx *xsmp_ctx);
+static void notify_ulp(struct xsmp_ctx *ctx, int evt);
+
+static struct xsmp_ctx *xsmp_get_ctx(void *cookie)
+{
+       int idr = (int)(unsigned long)cookie;
+       struct xsmp_ctx *ctx;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xsmp_glob_lock, flags);
+       ctx = idr_find(&xsmp_id_table, idr);
+       if (!ctx) {
+               spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+               return NULL;
+       }
+       /*
+        * Increment reference count
+        */
+       atomic_inc(&ctx->ref_cnt);
+       spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+       return ctx;
+}
+
+static void xsmp_put_ctx(struct xsmp_ctx *ctx)
+{
+       atomic_dec(&ctx->ref_cnt);
+}
+
+void xsmp_ulp_notify(struct xscore_port *port, int port_up)
+{
+       struct xsmp_ctx *xsmp_ctx;
+
+       mutex_lock(&xsmp_mutex);
+       list_for_each_entry(xsmp_ctx, &port->xsmp_list, list) {
+               if (port_up)
+                       clear_bit(XSMP_IBLINK_DOWN, &xsmp_ctx->flags);
+               else {
+                       set_bit(XSMP_IBLINK_DOWN, &xsmp_ctx->flags);
+                       clear_bit(XSMP_REG_SENT, &xsmp_ctx->flags);
+                       clear_bit(XSMP_REG_CONFIRM_RCVD, &xsmp_ctx->flags);
+                       xsmp_ctx->state = XSMP_SESSION_ERROR;
+               }
+               notify_ulp(xsmp_ctx,
+                          port_up ? XSCORE_PORT_UP : XSCORE_PORT_DOWN);
+       }
+       mutex_unlock(&xsmp_mutex);
+}
+
+void xsmp_allocate_xsmp_session(struct xscore_port *port, u64 dguid, u16 dlid)
+{
+       struct xsmp_ctx *xsmp_ctx;
+       int found = 0;
+
+       XSMP_FUNCTION("%s dguid: 0x%llx, dlid: 0x%x\n", __func__, dguid, dlid);
+
+       /*
+        * Grab the xsmp mutex. This protects the xsmp list from 3 different
+        * threads.
+        * 1. The port workq through which xsmp session add/delete happens
+        * 2. A rmmod thread (when user issues rmmod) (module unload)
+        * 3. A ULP attaches to XSMP layer (session update list) or deattaches
+        * This happens when xsvnic/xsvhba/uadm is loaded/unloaded
+        */
+       mutex_lock(&xsmp_mutex);
+
+       list_for_each_entry(xsmp_ctx, &port->xsmp_list, list) {
+               if (xsmp_ctx->dguid == dguid && port == xsmp_ctx->port) {
+                       /*
+                        * We saw the IO director from the same port
+                        * (dguid + port)
+                        * Now check if we have a LID change
+                        */
+                       if (dlid != xsmp_ctx->dlid) {
+                               XSMP_PRINT
+                                   ("IO Director %s (GUID: 0x%llx)LID changd ",
+                                    xsmp_ctx->chassis_name, xsmp_ctx->dguid);
+                               XSMP_PRINT("from 0x%x - 0x%x on port: 0x%llx\n",
+                                          xsmp_ctx->dlid, dlid, port->guid);
+                               /*
+                                * The connection will get torn down and
+                                * reconnect back because of hello timeout
+                                */
+                               xsmp_ctx->dlid = dlid;
+                               xsmp_ctx->conn_ctx.dlid = dlid;
+                       }
+                       found++;
+                       break;
+               }
+       }
+       /*
+        * Did not find an entry, now start an XSMP session
+        * Need to be called in non-irq context
+        */
+       if (!found)
+               xsmp_session_create(port, dguid, dlid);
+
+       mutex_unlock(&xsmp_mutex);
+}
+
+void xsmp_cleanup_stale_xsmp_sessions(struct xscore_port *port, int force)
+{
+       struct xsmp_ctx *xsmp_ctx, *tmp;
+
+       XSMP_FUNCTION("%s:\n", __func__);
+
+       /*
+        *  Protect list from rmmod thread/port wq and ULP register/unregister
+        */
+       mutex_lock(&xsmp_mutex);
+
+       list_for_each_entry_safe(xsmp_ctx, tmp, &port->xsmp_list, list) {
+               if (force || test_bit(XSMP_DELETE_BIT, &xsmp_ctx->flags)) {
+                       XSMP_PRINT("Deleted XSMP session %s : %s (0x%llx)\n",
+                                  xsmp_ctx->session_name,
+                                  xsmp_ctx->chassis_name, xsmp_ctx->dguid);
+                       /*
+                        * If we are in force mode, notify ULP's that either
+                        * 1. module is going away
+                        * 2. or underlying hardware driver is going away
+                        */
+                       if (force)
+                               notify_ulp(xsmp_ctx, XSCORE_DEVICE_REMOVAL);
+                       xsmp_cleanup_session(xsmp_ctx);
+               }
+       }
+
+       mutex_unlock(&xsmp_mutex);
+}
+
+/*
+ * Need to be called with global spin lock held
+ */
+static int xsmp_send_resource_list_update(void)
+{
+       struct xsmp_ctx *xsmp_ctx;
+
+       mutex_lock(&xsmp_mutex);
+       list_for_each_entry(xsmp_ctx, &gxsmp_list, glist) {
+               xsmp_ctx->counters[XSMP_RES_LIST_COUNTER]++;
+               xsmp_send_resource_list(xsmp_ctx, xcpm_resource_flags);
+       }
+       mutex_unlock(&xsmp_mutex);
+       return 0;
+}
+
+int xcpm_register_service(struct xsmp_service_reg_info *s_info)
+{
+       struct xsmp_service_reg_info *sp;
+       int i = s_info->ctrl_message_type;
+
+       if (i < 1 || i >= MAX_NUM_SVCS)
+               return -EINVAL;
+
+       sp = &xcpm_services[i];
+       /*
+        * Check for duplicate entries
+        */
+       mutex_lock(&svc_mutex);
+       if (sp->svc_state == SVC_STATE_UP) {
+               mutex_unlock(&svc_mutex);
+               return i;
+       }
+       sp->ctrl_message_type = s_info->ctrl_message_type;
+       sp->resource_flag_index = s_info->resource_flag_index;
+       sp->receive_handler = s_info->receive_handler;
+       sp->event_handler = s_info->event_handler;
+       sp->callout_handler = s_info->callout_handler;
+       sp->svc_state = SVC_STATE_UP;
+       /*
+        * Kick start sending resource list list to remote end
+        */
+       xcpm_resource_flags |= (1 << sp->resource_flag_index);
+       xsmp_send_resource_list_update();
+       mutex_unlock(&svc_mutex);
+       return i;
+}
+EXPORT_SYMBOL(xcpm_register_service);
+
+static int xcpm_send_msg_client(struct xsmp_ctx *xsmp_ctx, int svc_id,
+                               void *msg, int len)
+{
+       int ret = -ENOTCONN;
+       struct xsmp_service_reg_info *sp = &xcpm_services[svc_id];
+
+       mutex_lock(&svc_mutex);
+       if (sp->svc_state == SVC_STATE_UP && sp->receive_handler) {
+               atomic_inc(&sp->ref_cnt);
+               mutex_unlock(&svc_mutex);
+               sp->receive_handler((void *) (unsigned long)
+                                   xsmp_ctx->idr, msg, len);
+               ret = 0;
+               atomic_dec(&sp->ref_cnt);
+       } else
+               mutex_unlock(&svc_mutex);
+       return ret;
+}
+
+int xcpm_send_msg_xsigod(void *xsmp_hndl, void *msg, int len)
+{
+       struct xsmp_ctx *ctx;
+       int ret;
+
+       ctx = xsmp_get_ctx(xsmp_hndl);
+       if (!ctx)
+               return -EINVAL;
+
+       if (xcpm_resource_flags & (1 << RESOURCE_FLAG_INDEX_USPACE))
+               ret =
+                   xcpm_send_msg_client(ctx, XSMP_MESSAGE_TYPE_USPACE, msg,
+                                        len);
+       else {
+               xscore_uadm_receive(xsmp_hndl, msg, len);
+               ret = 0;
+       }
+
+       xsmp_put_ctx(ctx);
+       return ret;
+}
+EXPORT_SYMBOL(xcpm_send_msg_xsigod);
+
+int xcpm_unregister_service(int service_id)
+{
+       struct xsmp_service_reg_info *sp = &xcpm_services[service_id];
+
+       mutex_lock(&svc_mutex);
+       if (sp->svc_state == SVC_STATE_UP) {
+               sp->svc_state = SVC_STATE_DOWN;
+               mutex_unlock(&svc_mutex);
+               while (atomic_read(&sp->ref_cnt))
+                       msleep(20);
+               xcpm_resource_flags &= ~(1 << sp->resource_flag_index);
+               /*
+                * Send updated list
+                */
+               xsmp_send_resource_list_update();
+       } else
+               mutex_unlock(&svc_mutex);
+       return 0;
+}
+EXPORT_SYMBOL(xcpm_unregister_service);
+
+void *xcpm_alloc_msg(int sz)
+{
+       return kmalloc(sz, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(xcpm_alloc_msg);
+
+void xcpm_free_msg(void *msg)
+{
+       kfree(msg);
+}
+EXPORT_SYMBOL(xcpm_free_msg);
+
+int xcpm_is_xsigod_enabled(void)
+{
+       return xsigod_enable;
+}
+EXPORT_SYMBOL(xcpm_is_xsigod_enabled);
+
+static inline void change_header_byte_order(struct xsmp_message_header
+                                           *m_header)
+{
+       m_header->length = cpu_to_be16(m_header->length);
+       m_header->seq_number = cpu_to_be32(m_header->seq_number);
+       m_header->source_id.node_id_primary =
+           cpu_to_be64(m_header->source_id.node_id_primary);
+       m_header->dest_id.node_id_primary =
+           cpu_to_be64(m_header->dest_id.node_id_primary);
+}
+
+static inline void change_session_byte_order(struct xsmp_session_msg *m_session)
+{
+       m_session->length = cpu_to_be16(m_session->length);
+       m_session->resource_flags = cpu_to_be32(m_session->resource_flags);
+       m_session->version = cpu_to_be32(m_session->version);
+       m_session->chassis_version = cpu_to_be32(m_session->chassis_version);
+       m_session->boot_flags = cpu_to_be32(m_session->boot_flags);
+       m_session->fw_ver = cpu_to_be64(m_session->fw_ver);
+       m_session->hw_ver = cpu_to_be32(m_session->hw_ver);
+       m_session->vendor_part_id = cpu_to_be32(m_session->vendor_part_id);
+}
+
+int xcpm_get_xsmp_session_info(void *xsmp_hndl,
+                              struct xsmp_session_info *ip)
+{
+       struct xsmp_ctx *ctx;
+
+       ctx = xsmp_get_ctx(xsmp_hndl);
+       if (!ctx)
+               return -EINVAL;
+
+       strncpy(ip->chassis_name, ctx->chassis_name,
+               sizeof(ip->chassis_name) - 1);
+       ip->chassis_name[sizeof(ip->chassis_name) - 1] = 0;
+       strncpy(ip->session_name, ctx->session_name,
+               sizeof(ip->session_name) - 1);
+       ip->session_name[sizeof(ip->session_name) - 1] = 0;
+       ip->version = ctx->xsigo_xsmp_version;
+       ip->port = ctx->port;
+       ip->ib_device = ctx->port->xs_dev->device;
+       ip->dma_device = ctx->port->xs_dev->device->dma_device;
+       ip->pd = ctx->port->xs_dev->pd;
+       ip->mr = ctx->port->xs_dev->mr;
+       ip->is_shca = ctx->port->xs_dev->is_shca;
+       ip->dguid = ctx->dguid;
+       xsmp_put_ctx(ctx);
+       return 0;
+}
+EXPORT_SYMBOL(xcpm_get_xsmp_session_info);
+
+int xcpm_check_duplicate_names(void *xsmp_hndl, char *name, u8 svc_id)
+{
+       int ret = 0;
+       struct xsmp_service_reg_info *sp = &xcpm_services[svc_id];
+       struct net_device *chk_netdev;
+
+       if (strcmp(name, VMWARE_RESERVED_KEYS) == 0) {
+               pr_err("%s %s is not supported vnic name ", __func__, name);
+               pr_err("(it is a reserved keyword for esx5.0)\n");
+               ret = -EINVAL;
+               goto out;
+       }
+
+       chk_netdev = dev_get_by_name(&init_net, name);
+       if (chk_netdev != NULL) {
+               ret = -EINVAL;
+               pr_info("%s !!Warning!! NIC %s is already", __func__, name);
+               pr_info("present in system\n");
+               dev_put(chk_netdev);
+               goto out;
+       }
+
+       mutex_lock(&svc_mutex);
+       if (sp->svc_state == SVC_STATE_UP && sp->callout_handler) {
+               atomic_inc(&sp->ref_cnt);
+               mutex_unlock(&svc_mutex);
+               ret = sp->callout_handler(name);
+               atomic_dec(&sp->ref_cnt);
+       } else
+               mutex_unlock(&svc_mutex);
+out:
+       return ret;
+}
+EXPORT_SYMBOL(xcpm_check_duplicate_names);
+
+int xcpm_send_message(void *hndl, int svc_id, u8 *msg, int len)
+{
+       unsigned long flags;
+       struct xsmp_ctx *ctx;
+       int ret;
+       struct xsmp_message_header *m_header;
+
+       m_header = (struct xsmp_message_header *)msg;
+
+       ctx = xsmp_get_ctx(hndl);
+       if (!ctx)
+               return -EINVAL;
+       /*
+        * Now check state of XSMP
+        */
+       spin_lock_irqsave(&ctx->lock, flags);
+       if (ctx->state != XSMP_SESSION_CONNECTED) {
+               ctx->counters[XSMP_SESSION_CONN_DOWN_COUNTER]++;
+               spin_unlock_irqrestore(&ctx->lock, flags);
+               xsmp_put_ctx(ctx);
+               return -ENOTCONN;
+       }
+       /*
+        * Fix sequence number and GUID
+        */
+       m_header->seq_number = cpu_to_be32(ctx->seq_number++);
+       m_header->source_id.node_id_primary = cpu_to_be64(ctx->port->guid);
+       m_header->source_id.node_id_aux = 0;
+       m_header->dest_id.node_id_aux = 0;
+       m_header->dest_id.node_id_primary = cpu_to_be64(ctx->dguid);
+       ret =
+           xscore_post_send(&ctx->conn_ctx, m_header, len,
+                            XSCORE_DEFER_PROCESS);
+       ctx->counters[XSMP_TOTAL_MSG_SENT_COUNTER]++;
+       switch (svc_id) {
+       case XSMP_MESSAGE_TYPE_VNIC:
+               ctx->counters[XSMP_VNIC_MESSAGE_SENT_COUNTER]++;
+               break;
+       case XSMP_MESSAGE_TYPE_VHBA:
+               ctx->counters[XSMP_VHBA_MESSAGE_SENT_COUNTER]++;
+               break;
+       case XSMP_MESSAGE_TYPE_USPACE:
+               ctx->counters[XSMP_USPACE_MESSAGE_SENT_COUNTER]++;
+               break;
+       case XSMP_MESSAGE_TYPE_XVE:
+               ctx->counters[XSMP_XVE_MESSAGE_SENT_COUNTER]++;
+               break;
+       default:
+               break;
+       }
+       if (ret) {
+               if (ret == -ENOBUFS)
+                       ctx->counters[XSMP_SESSION_RING_FULL_COUNTER]++;
+               else
+                       ctx->counters[XSMP_SESSION_SEND_ERROR_COUNTER]++;
+       }
+       spin_unlock_irqrestore(&ctx->lock, flags);
+       xsmp_put_ctx(ctx);
+       return ret;
+}
+EXPORT_SYMBOL(xcpm_send_message);
+
+/*
+ * XSMP session will be considered to "match" (i.e. are the
+ * same logical communication path) if the remote (destination) GUID
+ * and the session (aka server profile name) are identical.
+ * GUIDs by definition should be unique and there is a requirement
+ * that each server profile name on a given chassis be unique.
+ */
+int xsmp_sessions_match(struct xsmp_session_info *infop, void *cookie)
+{
+       struct xsmp_ctx *ctx;
+       int rc;
+
+       ctx = xsmp_get_ctx(cookie);
+       if (!ctx)
+               return 0;
+       rc = ((infop->dguid == ctx->dguid)
+             && (strncmp(infop->session_name, ctx->session_name,
+                         SESSION_NAME_LEN) == 0));
+       xsmp_put_ctx(ctx);
+       return rc;
+}
+EXPORT_SYMBOL(xsmp_sessions_match);
+
+void xscore_wait_for_link_up(void)
+{
+       struct xscore_port *port;
+       int time, delayms = 1000;
+       int timeoutsecs = 90;
+       struct ib_port_attr port_attr;
+       int all_up;
+
+       for (time = 0; time < timeoutsecs * 1000; time += delayms) {
+               all_up = 1;
+               mutex_lock(&xscore_port_mutex);
+               list_for_each_entry(port, &xscore_port_list, gport_list) {
+                       (void)ib_query_port(port->xs_dev->device,
+                                           port->port_num, &port_attr);
+                       if (port_attr.state != IB_PORT_ACTIVE) {
+                               all_up = 0;
+                               continue;
+                       }
+               }
+               mutex_unlock(&xscore_port_mutex);
+               if (all_up)
+                       break;
+               msleep(delayms);
+       }
+}
+
+void xscore_wait_for_xds_resp(void)
+{
+       struct xscore_port *port;
+       int time, delayms = 1000;
+       int timeoutsecs = 30;
+       struct ib_port_attr port_attr;
+       int all_ok;
+
+       for (time = 0; time < timeoutsecs * 1000; time += delayms) {
+               all_ok = 1;
+               mutex_lock(&xscore_port_mutex);
+               list_for_each_entry(port, &xscore_port_list, gport_list) {
+                       (void)ib_query_port(port->xs_dev->device,
+                                           port->port_num, &port_attr);
+                       if (port_attr.state != IB_PORT_ACTIVE)
+                               continue;
+                       /*
+                        * Check if XDS bit is set
+                        */
+                       if (!test_bit(XSCORE_SP_PRESENT, &port->flags)
+                           && !test_bit(XSCORE_SP_NOT_PRESENT, &port->flags))
+                               all_ok = 0;
+               }
+               mutex_unlock(&xscore_port_mutex);
+               if (all_ok)
+                       break;
+               msleep(delayms);
+       }
+}
+
+/*
+ * This is used the xsigoboot driver to verify all XSMP sessions are up
+ */
+int xsmp_sessions_up(void)
+{
+       struct xsmp_ctx *xsmp_ctx;
+       int n = 0;
+
+       mutex_lock(&xsmp_mutex);
+       if (list_empty(&gxsmp_list)) {
+               /*
+                * If XSMP list is empty mark all sessions up
+                */
+               n = 1;
+               goto out;
+       }
+       list_for_each_entry(xsmp_ctx, &gxsmp_list, glist) {
+               if (xsmp_ctx->state != XSMP_SESSION_CONNECTED) {
+                       n = 0;
+                       break;
+               }
+               n++;
+       }
+out:
+       mutex_unlock(&xsmp_mutex);
+       return n > 0;
+}
+
+/*
+ * wait for the XSMP sessions to come up.
+ */
+int xscore_wait_for_sessions(u8 cal_time)
+{
+       unsigned long init_time;
+       int time, ret = 0, delayms = 1000;
+       int timeoutsecs = xscore_sess_wait_time;
+
+       init_time = jiffies;
+
+       if (!xscore_wait_in_boot)
+               goto out;
+
+       if (cal_time)
+               pr_info("XSCORE: Waiting for XSMP Session to come up .....\n");
+       else {
+               mutex_lock(&xsmp_mutex);
+               if (list_empty(&gxsmp_list))
+                       ret = 0;
+               else
+                       ret = 1;
+               mutex_unlock(&xsmp_mutex);
+               return ret;
+       }
+
+       xscore_wait_for_link_up();
+
+       xscore_wait_for_xds_resp();
+
+       for (time = 0; time < timeoutsecs * 1000; time += delayms) {
+               if (xsmp_sessions_up()) {
+                       XSMP_INFO("XSMP Sessions are up\n");
+                       ret = delayms;
+                       goto out;
+               }
+               msleep(delayms);
+               XSMP_INFO("Waiting for XSMP Session to be up\n");
+       }
+       XSMP_INFO("XSMP Sessions are not up\n");
+
+out:
+       if (cal_time)
+               xscore_wait_time = jiffies - init_time;
+       return ret;
+}
+EXPORT_SYMBOL(xscore_wait_for_sessions);
+
+static int send_xsmp_sess_msg(struct xsmp_ctx *ctxp, u8 type, u32 rflags)
+{
+       struct xsmp_session_msg *m_session;
+       struct xsmp_message_header *m_header;
+       unsigned long flags;
+       int ret = 0;
+       int len;
+
+       m_header = kmalloc(MAX_XSMP_MSG_SIZE, GFP_ATOMIC);
+       if (!m_header)
+               return -ENOMEM;
+       spin_lock_irqsave(&ctxp->lock, flags);
+       if (ctxp->state < XSMP_SESSION_TPT_CONNECTED
+           || ctxp->state > XSMP_SESSION_CONNECTED) {
+               ret = -ENOTCONN;
+               goto out;
+       }
+       m_session = (struct xsmp_session_msg *)(m_header + 1);
+
+       m_header->type = XSMP_MESSAGE_TYPE_SESSION;
+       len = m_header->length = sizeof(*m_header) + sizeof(*m_session);
+
+       m_header->source_id.node_id_primary = ctxp->port->guid;
+       m_header->source_id.node_id_aux = 0;
+       m_header->dest_id.node_id_primary = ctxp->dguid;
+       m_header->dest_id.node_id_aux = 0;
+       m_header->seq_number = ctxp->seq_number++;
+
+       m_session->type = type;
+       m_session->length = sizeof(*m_session);
+       m_session->resource_flags = rflags | RESOURCE_OS_TYPE_LINUX;
+       m_session->version = XSIGO_LINUX_DRIVER_VERSION;
+       m_session->chassis_version = MINIMUM_XSIGOS_VERSION;
+       m_session->boot_flags = boot_flag;
+       m_session->fw_ver = ctxp->port->xs_dev->fw_ver;
+       m_session->hw_ver = ctxp->port->xs_dev->hw_ver;
+       m_session->vendor_part_id = ctxp->port->xs_dev->vendor_part_id;
+
+       change_header_byte_order(m_header);
+       change_session_byte_order(m_session);
+       ret =
+           xscore_post_send(&ctxp->conn_ctx, m_header, len,
+                            XSCORE_DEFER_PROCESS);
+       ctxp->counters[XSMP_TOTAL_MSG_SENT_COUNTER]++;
+       ctxp->counters[XSMP_SESSION_MESSAGE_SENT_COUNTER]++;
+       if (ret) {
+               if (ret == -ENOBUFS)
+                       ctxp->counters[XSMP_SESSION_RING_FULL_COUNTER]++;
+               else
+                       ctxp->counters[XSMP_SESSION_SEND_ERROR_COUNTER]++;
+       }
+out:
+       spin_unlock_irqrestore(&ctxp->lock, flags);
+       if (ret)
+               kfree(m_header);
+       return ret;
+}
+
+static int xsmp_send_register_msg(struct xsmp_ctx *ctx, u32 rflags)
+{
+       return send_xsmp_sess_msg(ctx, XSMP_SESSION_REGISTER, rflags);
+}
+
+static int xsmp_send_hello_msg(struct xsmp_ctx *ctx)
+{
+       return send_xsmp_sess_msg(ctx, XSMP_SESSION_HELLO, 0);
+}
+
+int xsmp_send_resource_list(struct xsmp_ctx *ctx, u32 rflags)
+{
+       return send_xsmp_sess_msg(ctx, XSMP_SESSION_RESOURCE_LIST, rflags);
+}
+
+int xsmp_send_shutdown(struct xsmp_ctx *ctx)
+{
+       return send_xsmp_sess_msg(ctx, XSMP_SESSION_SHUTDOWN, 0);
+}
+
+static void handle_reg_confirm_msg(struct xsmp_ctx *ctx,
+                                  struct xsmp_session_msg *m_session)
+{
+       int hello_interval = m_session->version;
+       int datapath_timeout = m_session->resource_flags;
+
+       XSMP_INFO("Rcvd XSMP_SESSION_REG_CONFIRM from 0x%llx\n", ctx->dguid);
+       set_bit(XSMP_REG_CONFIRM_RCVD, &ctx->flags);
+       ctx->counters[XSMP_REG_CONF_COUNTER]++;
+       ctx->state = XSMP_SESSION_CONNECTED;
+       ctx->hello_timeout = msecs_to_jiffies(hello_interval * 3 * 1000);
+
+       if (datapath_timeout != -1)
+               ctx->datapath_timeout = (hello_interval * 3) * 2;
+       else
+               ctx->datapath_timeout = -1;
+
+       ctx->xsigo_xsmp_version = ntohl(m_session->xsigo_xsmp_version);
+       memcpy(ctx->chassis_name, m_session->chassis_name, CHASSIS_NAME_LEN);
+       ctx->chassis_name[CHASSIS_NAME_LEN - 1] = '\0';
+       memcpy(ctx->session_name, m_session->session_name, SESSION_NAME_LEN);
+       ctx->session_name[SESSION_NAME_LEN - 1] = '\0';
+       XSMP_PRINT("Established XSMP session (%s) to chassis (%s)\n",
+                  ctx->session_name, ctx->chassis_name);
+}
+
+static int is_seq_number_ok(struct xsmp_ctx *ctx,
+                           struct xsmp_message_header *hdr)
+{
+       int ok = 1;
+
+       if (ctx->rcv_seq_number != be32_to_cpu(hdr->seq_number)) {
+               XSMP_INFO("XSMP Session 0x%llx", ctx->dguid);
+               XSMP_INFO("Seq number mismatch: exp: 0x%x, actual: 0x%x\n",
+                         ctx->rcv_seq_number, be32_to_cpu(hdr->seq_number));
+               ctx->counters[XSMP_SEQ_MISMATCH_COUNTER]++;
+               ok = 0;
+       }
+       ctx->rcv_seq_number++;
+       return ok;
+}
+
+static void handle_hello_msg(struct xsmp_ctx *ctx,
+                            struct xsmp_message_header *hdr)
+{
+       XSMP_INFO("Rcvd XSMP_SESSION_HELLO from 0x%llx\n", ctx->dguid);
+       ctx->hello_jiffies = jiffies;
+       if (xsmp_send_hello_msg(ctx)) {
+               /*
+                * Mark connection as bad and reconnect
+                */
+       } else {
+               ctx->counters[XSMP_HELLO_SENT_COUNTER]++;
+       }
+}
+
+static int xsmp_process_xsmp_session_type(struct xsmp_ctx *ctx, void *msg,
+                                         int length)
+{
+       struct xsmp_message_header *m_header = msg;
+       struct xsmp_session_msg *m_session =
+           (struct xsmp_session_msg *)(m_header + 1);
+
+       XSMP_FUNCTION("%s: Processing message from GUID: %llx\n",
+                     __func__, ctx->dguid);
+
+       if (length < sizeof(*m_header)) {
+               kfree(msg);
+               return -EINVAL;
+       }
+       change_header_byte_order(m_header);
+       if (length > m_header->length) {
+               kfree(msg);
+               return -EINVAL;
+       }
+       change_session_byte_order(m_session);
+
+       switch (m_session->type) {
+       case XSMP_SESSION_REG_CONFIRM:
+               handle_reg_confirm_msg(ctx, m_session);
+               set_bit(XSMP_REG_CONFIRM_RCVD, &ctx->flags);
+               break;
+       case XSMP_SESSION_HELLO:
+               ctx->counters[XSMP_HELLO_RCVD_COUNTER]++;
+               handle_hello_msg(ctx, m_header);
+               break;
+       case XSMP_SESSION_REG_REJECT:
+               ctx->counters[XSMP_REJ_RCVD_COUNTER]++;
+               set_bit(XSMP_SHUTTINGDOWN_BIT, &ctx->flags);
+               XSMP_PRINT("XSMP REJECT received session %s : %s (0x%llx)\n",
+                          ctx->session_name, ctx->chassis_name, ctx->dguid);
+               break;
+       case XSMP_SESSION_SHUTDOWN:
+               ctx->counters[XSMP_SHUTDOWN_RCVD_COUNTER]++;
+               XSMP_PRINT("XSMP shutdown received session %s : %s (0x%llx)\n",
+                          ctx->session_name, ctx->chassis_name, ctx->dguid);
+               set_bit(XSMP_SHUTTINGDOWN_BIT, &ctx->flags);
+               break;
+       default:
+               break;
+       }
+       kfree(msg);
+       return 0;
+}
+
+static void xsmp_cleanup_session(struct xsmp_ctx *xsmp_ctx)
+{
+       unsigned long flags, flags1;
+       /*
+        * Now delete the entry from the list & idr
+        */
+       XSMP_FUNCTION("%s: Cleaning up 0x%llx\n", __func__, xsmp_ctx->dguid);
+       xcpm_xsmp_remove_proc_entry(xsmp_ctx);
+       spin_lock_irqsave(&xsmp_glob_lock, flags);
+       idr_remove(&xsmp_id_table, xsmp_ctx->idr);
+       xsmp_ctx->idr = -1;
+       spin_lock_irqsave(&xsmp_ctx->lock, flags1);
+       set_bit(XSMP_SHUTTINGDOWN_BIT, &xsmp_ctx->flags);
+       spin_unlock_irqrestore(&xsmp_ctx->lock, flags1);
+       spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+       /*
+        * Now disconnect and cleanup connection
+        */
+       (void)xsmp_sess_disconnect(xsmp_ctx);
+
+       if (cancel_delayed_work(&xsmp_ctx->sm_work))
+               xsmp_put_ctx(xsmp_ctx);
+       /*
+        * Wait for reference count to goto zero
+        */
+       while (atomic_read(&xsmp_ctx->ref_cnt))
+               msleep(100);
+
+       xscore_conn_destroy(&xsmp_ctx->conn_ctx);
+       spin_lock_irqsave(&xsmp_glob_lock, flags);
+       list_del(&xsmp_ctx->list);
+       list_del(&xsmp_ctx->glist);
+       spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+       kfree(xsmp_ctx);
+}
+
+static int xsmp_check_msg_type(struct xsmp_ctx *xsmp_ctx, void *msg)
+{
+       struct xsmp_session_msg *m_session = { 0 };
+       struct xsmp_message_header *m_header =
+           (struct xsmp_message_header *)msg;
+       int ret = 1;
+
+       switch (m_header->type) {
+       case XSMP_MESSAGE_TYPE_SESSION:
+               m_session = (struct xsmp_session_msg *)(m_header + 1);
+               if (m_session->type == XSMP_SESSION_HELLO)
+                       ret = 0;
+               break;
+       default:
+               break;
+
+       }
+       return ret;
+}
+
+/*
+ * Executes in workq/thread context
+ * Potentially can use idr here XXX
+ */
+static void xsmp_process_recv_msgs(struct work_struct *work)
+{
+       struct xsmp_work *xwork = container_of(work, struct xsmp_work,
+                                              work);
+       struct xsmp_message_header *m_header = xwork->msg;
+       struct xsmp_ctx *xsmp_ctx = xwork->xsmp_ctx;
+       int sendup = 0;
+
+       xscore_set_wq_state(XSCORE_WQ_XSMP_PROC_MSG);
+       is_seq_number_ok(xsmp_ctx, m_header);
+
+       switch (m_header->type) {
+       case XSMP_MESSAGE_TYPE_VNIC:
+               xsmp_ctx->counters[XSMP_VNIC_MESSAGE_COUNTER]++;
+               sendup++;
+               break;
+       case XSMP_MESSAGE_TYPE_VHBA:
+               xsmp_ctx->counters[XSMP_VHBA_MESSAGE_COUNTER]++;
+               sendup++;
+               break;
+       case XSMP_MESSAGE_TYPE_USPACE:
+               xsmp_ctx->counters[XSMP_USPACE_MESSAGE_COUNTER]++;
+               sendup++;
+               break;
+       case XSMP_MESSAGE_TYPE_XVE:
+               xsmp_ctx->counters[XSMP_XVE_MESSAGE_COUNTER]++;
+               sendup++;
+               break;
+       case XSMP_MESSAGE_TYPE_SESSION:
+               xsmp_ctx->counters[XSMP_SESSION_MESSAGE_COUNTER]++;
+               xsmp_process_xsmp_session_type(xwork->xsmp_ctx, xwork->msg,
+                                              xwork->len);
+               break;
+       default:
+               kfree(xwork->msg);
+               XSMP_ERROR("%s: Unknown message type: %d\n", __func__,
+                          m_header->type);
+               break;
+       }
+       if (sendup) {
+               if (xcpm_send_msg_client
+                   (xsmp_ctx, m_header->type, xwork->msg, xwork->len))
+                       kfree(xwork->msg);
+       }
+       kfree(xwork);
+       xsmp_put_ctx(xsmp_ctx);
+       xscore_clear_wq_state(XSCORE_WQ_XSMP_PROC_MSG);
+}
+
+static void queue_sm_work(struct xsmp_ctx *xsmp_ctx, int msecs)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&xsmp_ctx->lock, flags);
+       if (!test_bit(XSMP_SHUTTINGDOWN_BIT, &xsmp_ctx->flags)) {
+               atomic_inc(&xsmp_ctx->ref_cnt);
+               queue_delayed_work(xsmp_ctx->wq, &xsmp_ctx->sm_work,
+                                  msecs_to_jiffies(msecs));
+       } else
+               set_bit(XSMP_DELETE_BIT, &xsmp_ctx->flags);
+       spin_unlock_irqrestore(&xsmp_ctx->lock, flags);
+}
+
+static int xsmp_sess_disconnect(struct xsmp_ctx *xsmp_ctx)
+{
+       xsmp_ctx->state = XSMP_SESSION_DISCONNECTED;
+       (void)xscore_conn_disconnect(&xsmp_ctx->conn_ctx, 0);
+       return 0;
+}
+
+static int xsmp_sess_connect(struct xsmp_ctx *xsmp_ctx)
+{
+       int ret = 0;
+
+       switch (xsmp_ctx->state) {
+       case XSMP_SESSION_ERROR:
+       case XSMP_SESSION_INIT:
+       case XSMP_SESSION_DISCONNECTED:
+               xsmp_ctx->counters[XSMP_CONN_RETRY_COUNTER]++;
+               xsmp_ctx->rcv_seq_number = 1;
+               xsmp_ctx->seq_number = 1;
+               xsmp_ctx->jiffies = jiffies;
+               xsmp_ctx->state = XSMP_SESSION_TPT_CONNECTING;
+               clear_bit(XSMP_REG_SENT, &xsmp_ctx->flags);
+               clear_bit(XSMP_REG_CONFIRM_RCVD, &xsmp_ctx->flags);
+               XSMP_INFO("%s: Session to 0x%llx, Trying\n", __func__,
+                         xsmp_ctx->dguid);
+               ret = xscore_conn_connect(&xsmp_ctx->conn_ctx,
+                                         XSCORE_SYNCHRONOUS);
+               if (ret) {
+                       xsmp_ctx->counters[XSMP_CONN_FAILED_COUNTER]++;
+                       XSMP_INFO("%s: Session %s:%s to 0x%llx Failed ret %d\n",
+                                 __func__, xsmp_ctx->session_name,
+                                 xsmp_ctx->chassis_name, xsmp_ctx->dguid, ret);
+                       ret = -ENOTCONN;
+               } else {
+                       XSMP_INFO("%s: Session to 0x%llx successful\n",
+                                 __func__, xsmp_ctx->dguid);
+                       xsmp_ctx->counters[XSMP_CONN_SUCCESS_COUNTER]++;
+                       xsmp_ctx->jiffies = jiffies;
+                       xsmp_ctx->hello_jiffies = jiffies;
+                       xsmp_ctx->state = XSMP_SESSION_CONNECTING;
+                       if (xsmp_send_register_msg
+                           (xsmp_ctx, xcpm_resource_flags)) {
+                               XSMP_ERROR("REGISTER_MESSAGE failed");
+                               XSMP_ERROR("to GUID:0x%llx\n", xsmp_ctx->dguid);
+                       } else {
+                               set_bit(XSMP_REG_SENT, &xsmp_ctx->flags);
+                               xsmp_ctx->counters[XSMP_REG_SENT_COUNTER]++;
+                       }
+               }
+               break;
+       default:
+               XSMP_ERROR("%s:Connect called in wrong state, %d\n",
+                          __func__, xsmp_ctx->state);
+               break;
+       }
+       return ret;
+}
+
+static void xsmp_state_machine(struct xsmp_ctx *xsmp_ctx)
+{
+       if (xsmp_ctx->state == XSMP_SESSION_CONNECTED ||
+           xsmp_ctx->state == XSMP_SESSION_CONNECTING) {
+               xsmp_ctx->sm_delay = 10000;
+               /*
+                * Check hello time stamp
+                */
+               if (!boot_flag
+                   && (((long)jiffies - (long)xsmp_ctx->hello_jiffies) >
+                       (long)xsmp_ctx->hello_timeout)) {
+                       /*
+                        * Reconnect
+                        */
+                       XSMP_PRINT("XSMROR: trailing whitespacesis");
+                       XSMP_PRINT("(%s) expired..Reconnecting %s\n",
+                                  xsmp_ctx->session_name,
+                                  xsmp_ctx->chassis_name);
+
+                       xsmp_ctx->counters[XSMP_SESSION_TIMEOUT_COUNTER]++;
+               } else
+                       return;
+       }
+       xsmp_ctx->sm_delay = 2000;
+       (void)xsmp_sess_disconnect(xsmp_ctx);
+       if (!test_bit(XSMP_IBLINK_DOWN, &xsmp_ctx->flags))
+               xsmp_sess_connect(xsmp_ctx);
+}
+
+static void xsmp_state_machine_work(struct work_struct *work)
+{
+       struct xsmp_ctx *xsmp_ctx = container_of(work, struct xsmp_ctx,
+                                                sm_work.work);
+       xscore_set_wq_state(XSCORE_DWQ_SM_WORK);
+       if (!test_bit(XSMP_SHUTTINGDOWN_BIT, &xsmp_ctx->flags))
+               xsmp_state_machine(xsmp_ctx);
+       queue_sm_work(xsmp_ctx, xsmp_ctx->sm_delay);
+       xsmp_put_ctx(xsmp_ctx);
+       xscore_clear_wq_state(XSCORE_DWQ_SM_WORK);
+}
+
+/*
+ * Called from interrupt context
+ */
+void xsmp_send_handler(void *client_arg, void *msg, int status, int n)
+{
+       struct xsmp_ctx *xsmp_ctx = client_arg;
+
+       XSMP_INFO("%s: Status %d, GUID: 0x%llx\n", __func__, status,
+                 xsmp_ctx->dguid);
+       if (status) {
+               XSMP_ERROR
+                   ("XSMP: %s:%s Send Completion error: 0x%llx, status %d\n",
+                    xsmp_ctx->session_name, xsmp_ctx->chassis_name,
+                    xsmp_ctx->dguid, status);
+               xsmp_ctx->state = XSMP_SESSION_ERROR;
+       }
+       kfree(msg);
+}
+
+/*
+ * Called from interrupt context
+ */
+void xsmp_recv_handler(void *client_arg, void *msg, int sz, int status, int n)
+{
+       struct xsmp_ctx *xsmp_ctx = client_arg;
+       struct xsmp_work *work;
+       unsigned long flags;
+
+       if (status) {
+               /*
+                * XXX mark connection as bad and
+                * it reconnect (hello timer will kick in)
+                */
+               XSMP_ERROR
+                   ("XSMP: %s:%s Recv Completion error: 0x%llx, status %d\n",
+                    xsmp_ctx->session_name, xsmp_ctx->chassis_name,
+                    xsmp_ctx->dguid, status);
+               xsmp_ctx->state = XSMP_SESSION_ERROR;
+               kfree(msg);
+               return;
+       }
+       if (xscore_handle_hello_msg && !xsmp_check_msg_type(xsmp_ctx, msg)) {
+               xsmp_ctx->counters[XSMP_SESSION_MESSAGE_COUNTER]++;
+               xsmp_ctx->counters[XSMP_HELLO_INTERRUPT_COUNTER]++;
+               xsmp_process_xsmp_session_type(xsmp_ctx, msg, sz);
+               return;
+       }
+
+       work = kmalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work) {
+               kfree(msg);
+               return;
+       }
+       INIT_WORK(&work->work, xsmp_process_recv_msgs);
+       work->xsmp_ctx = xsmp_ctx;
+       work->msg = msg;
+       work->len = sz;
+       work->status = status;
+
+       spin_lock_irqsave(&xsmp_ctx->lock, flags);
+       if (!test_bit(XSMP_SHUTTINGDOWN_BIT, &xsmp_ctx->flags)) {
+               atomic_inc(&xsmp_ctx->ref_cnt);
+               queue_work(xsmp_ctx->wq, &work->work);
+       } else {
+               kfree(msg);
+               kfree(work);
+       }
+       spin_unlock_irqrestore(&xsmp_ctx->lock, flags);
+}
+
+static void notify_ulp(struct xsmp_ctx *ctx, int evt)
+{
+       int i;
+       struct xsmp_service_reg_info *sp;
+
+       mutex_lock(&svc_mutex);
+       for (i = 1; i < MAX_NUM_SVCS; i++) {
+               sp = &xcpm_services[i];
+               if (sp->svc_state == SVC_STATE_UP && sp->event_handler) {
+                       atomic_inc(&sp->ref_cnt);
+                       mutex_unlock(&svc_mutex);
+                       sp->event_handler((void *) (unsigned long)
+                                         ctx->idr, evt);
+                       atomic_dec(&sp->ref_cnt);
+                       mutex_lock(&svc_mutex);
+               }
+       }
+       mutex_unlock(&svc_mutex);
+}
+
+/*
+ * Called from CM thread context, if you want delayed
+ * processing, post to local thread
+ */
+void xsmp_event_handler(void *client_arg, int event)
+{
+       struct xsmp_ctx *xsmp_ctx = client_arg;
+
+       switch (event) {
+       case XSCORE_CONN_CONNECTED:
+               XSMP_INFO("XSCORE_CONN_CONNECTED: GUID: 0x%llx\n",
+                         xsmp_ctx->dguid);
+               break;
+       case XSCORE_CONN_ERR:
+               xsmp_ctx->state = XSMP_SESSION_ERROR;
+               XSMP_INFO("XSCORE_CONN_ERR: GUID: 0x%llx\n", xsmp_ctx->dguid);
+               break;
+       case XSCORE_CONN_RDISCONNECTED:
+               xsmp_ctx->state = XSMP_SESSION_DISCONNECTED;
+               XSMP_INFO("XSCORE_CONN_RDISCONNECTED: GUID: 0x%llx\n",
+                         xsmp_ctx->dguid);
+               break;
+       case XSCORE_CONN_LDISCONNECTED:
+               xsmp_ctx->state = XSMP_SESSION_DISCONNECTED;
+               XSMP_INFO("XSCORE_CONN_LDISCONNECTED: GUID: 0x%llx\n",
+                         xsmp_ctx->dguid);
+               break;
+       default:
+               break;
+       }
+       notify_ulp(xsmp_ctx, event);
+}
+
+struct xsmp_private_data {
+       u8 is_checksum;
+       u32 reserved[6];
+} __packed;
+
+int xsmp_session_create(struct xscore_port *port, u64 dguid, u16 dlid)
+{
+       struct xsmp_ctx *xsmp_ctx;
+       unsigned long flags;
+       static int next_id = 1;
+       int ret;
+       struct xscore_conn_ctx *cctx;
+       struct xsmp_private_data *cmp;
+
+       XSMP_FUNCTION("%s: dguid: 0x%llx, dlid: 0x%x\n", __func__, dguid, dlid);
+
+       xsmp_ctx = kzalloc(sizeof(*xsmp_ctx), GFP_ATOMIC);
+       if (!xsmp_ctx)
+               return -ENOMEM;
+       spin_lock_init(&xsmp_ctx->lock);
+
+       cctx = &xsmp_ctx->conn_ctx;
+       memset(cctx, 0, sizeof(*cctx));
+       cctx->tx_ring_size = xsmp_ring_size;
+       cctx->rx_ring_size = xsmp_ring_size;
+       cctx->rx_buf_size = MAX_XSMP_MSG_SIZE;
+       cctx->client_arg = xsmp_ctx;
+       cctx->event_handler = xsmp_event_handler;
+       cctx->send_compl_handler = xsmp_send_handler;
+       cctx->recv_msg_handler = xsmp_recv_handler;
+       cctx->dguid = dguid;
+       cctx->dlid = dlid;
+       cctx->service_id = be64_to_cpu(XSMP_SERVICE_ID);
+
+       cmp = (struct xsmp_private_data *)cctx->priv_data;
+       cctx->priv_data_len = sizeof(*cmp);
+       if (port->xs_dev->is_shca && shca_csum) {
+               cmp->is_checksum = 1;
+               cctx->features |= XSCORE_USE_CHECKSUM;
+       } else {
+               cmp->is_checksum = 0;
+               cctx->features &= ~XSCORE_USE_CHECKSUM;
+       }
+
+       ret = xscore_conn_init(&xsmp_ctx->conn_ctx, port);
+       if (ret) {
+               XSMP_ERROR("xscore_conn_init error %d\n", ret);
+               kfree(xsmp_ctx);
+               return ret;
+       }
+       xsmp_ctx->state = XSMP_SESSION_INIT;
+       xsmp_ctx->dguid = dguid;
+       xsmp_ctx->dlid = dlid;
+       xsmp_ctx->port = port;
+       xsmp_ctx->wq = port->port_wq;
+       xsmp_ctx->hello_timeout = msecs_to_jiffies(60 * 1000);
+
+       spin_lock_irqsave(&xsmp_glob_lock, flags);
+       xsmp_ctx->idr = idr_alloc(&xsmp_id_table, xsmp_ctx, next_id++,
+                       0, GFP_KERNEL);
+       spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+       if (xsmp_ctx->idr < 0) {
+               XSMP_FUNCTION("%s: dguid: 0x%llx, Failed to get retr%d idr%d \n",
+                                __func__, dguid, xsmp_ctx->idr, next_id);
+               ret = -1;
+               return ret;
+       }
+
+       INIT_DELAYED_WORK(&xsmp_ctx->sm_work, xsmp_state_machine_work);
+       spin_lock_irqsave(&xsmp_glob_lock, flags);
+       list_add_tail(&xsmp_ctx->list, &port->xsmp_list);
+       list_add_tail(&xsmp_ctx->glist, &gxsmp_list);
+       spin_unlock_irqrestore(&xsmp_glob_lock, flags);
+       xcpm_xsmp_add_proc_entry(xsmp_ctx);
+       xsmp_ctx->sm_delay = 1000;
+       queue_sm_work(xsmp_ctx, 0);
+       return 0;
+}
+
+void xsmp_module_init(void)
+{
+       spin_lock_init(&xsmp_glob_lock);
+       mutex_init(&svc_mutex);
+       mutex_init(&xsmp_mutex);
+       idr_init(&xsmp_id_table);
+       INIT_LIST_HEAD(&gxsmp_list);
+}
+
+void xsmp_module_destroy(void)
+{
+       idr_destroy(&xsmp_id_table);
+       mutex_destroy(&svc_mutex);
+       mutex_destroy(&xsmp_mutex);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xsmp.h b/drivers/infiniband/ulp/xsigo/xscore/xsmp.h
new file mode 100644 (file)
index 0000000..bf931f4
--- /dev/null
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _XSMP_H_
+#define _XSMP_H_
+
+enum {
+       XSMP_REG_SENT_COUNTER,
+       XSMP_REG_CONF_COUNTER,
+       XSMP_RES_LIST_COUNTER,
+       XSMP_HELLO_RCVD_COUNTER,
+       XSMP_HELLO_INTERRUPT_COUNTER,
+       XSMP_REJ_RCVD_COUNTER,
+       XSMP_HELLO_SENT_COUNTER,
+       XSMP_SEQ_MISMATCH_COUNTER,
+       XSMP_SESSION_TIMEOUT_COUNTER,
+       XSMP_SHUTDOWN_RCVD_COUNTER,
+       XSMP_SHUTDOWN_SENT_COUNTER,
+       XSMP_VNIC_MESSAGE_COUNTER,
+       XSMP_VHBA_MESSAGE_COUNTER,
+       XSMP_USPACE_MESSAGE_COUNTER,
+       XSMP_XVE_MESSAGE_COUNTER,
+       XSMP_SESSION_MESSAGE_COUNTER,
+       XSMP_VNIC_MESSAGE_SENT_COUNTER,
+       XSMP_VHBA_MESSAGE_SENT_COUNTER,
+       XSMP_USPACE_MESSAGE_SENT_COUNTER,
+       XSMP_XVE_MESSAGE_SENT_COUNTER,
+       XSMP_SESSION_MESSAGE_SENT_COUNTER,
+       XSMP_SESSION_RING_FULL_COUNTER,
+       XSMP_SESSION_SEND_ERROR_COUNTER,
+       XSMP_SESSION_CONN_DOWN_COUNTER,
+       XSMP_TOTAL_MSG_SENT_COUNTER,
+       XSMP_CONN_RETRY_COUNTER,
+       XSMP_CONN_FAILED_COUNTER,
+       XSMP_CONN_SUCCESS_COUNTER,
+       XSMP_MAX_COUNTERS
+};
+
+enum {
+       XSMP_SESSION_ERROR,
+       XSMP_SESSION_INIT,
+       XSMP_SESSION_TPT_CONNECTING,
+       XSMP_SESSION_TPT_CONNECTED,
+       XSMP_SESSION_CONNECTING,
+       XSMP_SESSION_CONNECTED,
+       XSMP_SESSION_DISCONNECTING,
+       XSMP_SESSION_DISCONNECTED,
+};
+
+struct xsmp_ctx {
+       spinlock_t lock;
+       int state;
+       atomic_t ref_cnt;
+       unsigned long flags;
+#define        XSMP_DELETE_BIT         1
+#define        XSMP_SHUTTINGDOWN_BIT   2
+#define        XSMP_REG_SENT           3
+#define        XSMP_REG_CONFIRM_RCVD   4
+#define        XSMP_IBLINK_DOWN        5
+       struct list_head list;
+       struct list_head glist;
+       int idr;
+       unsigned long jiffies;
+       unsigned long hello_jiffies;
+       struct xscore_port *port;
+       struct xscore_conn_ctx conn_ctx;
+       u64 dguid;
+       u16 dlid;
+       struct delayed_work sm_work;
+       int sm_delay;
+       int hello_timeout;
+       struct workqueue_struct *wq;
+       int seq_number;
+       u32 counters[XSMP_MAX_COUNTERS];
+       u32 rcv_seq_number;
+       u32 xsigo_xsmp_version;
+       int datapath_timeout;
+       char chassis_name[CHASSIS_NAME_LEN];
+       char session_name[SESSION_NAME_LEN];
+};
+
+extern void xcpm_xsmp_add_proc_entry(struct xsmp_ctx *xsmp_ctx);
+extern void xcpm_xsmp_remove_proc_entry(struct xsmp_ctx *xsmp_ctx);
+#endif
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xsmp_common.h b/drivers/infiniband/ulp/xsigo/xscore/xsmp_common.h
new file mode 100644 (file)
index 0000000..d5043a3
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSMP_COMMON_H__
+#define __XSMP_COMMON_H__
+
+/*
+ *     Node ID: A 96-bit identifier of the initiating node
+ *     The lower part is the 'guid'
+ */
+struct xsmp_node_id {
+       u32 node_id_aux;
+       u64 node_id_primary;
+} __packed;
+
+/*
+ *     The XSMP message header
+ *
+ *     The message header precedes all XSMP messages from either
+ *     the XCM or the server.
+ *     'message_type' identifies the class of the message.
+ *     'seq_number' is a serially incrementing count (different
+ *     for each direction) used to track the order of messages.
+ *
+ *     This is followed by a series of message objects (of the same
+ *     class) adding up to the 'length' field of the header.
+ */
+struct xsmp_message_header {
+       u8 type;
+       u8 code;
+       u16 length;
+       u32 seq_number;
+       struct xsmp_node_id source_id;
+       struct xsmp_node_id dest_id;
+} __packed;
+
+#define XSMP_MESSAGE_TYPE_SESSION      1
+#define XSMP_MESSAGE_TYPE_VNIC         2
+#define XSMP_MESSAGE_TYPE_VHBA         3
+#define XSMP_MESSAGE_TYPE_VSSL         4
+#define XSMP_MESSAGE_TYPE_USPACE       5
+#define XSMP_MESSAGE_TYPE_XVE          6
+
+#define XSMP_MESSAGE_TYPE_MAX          8
+
+enum xscore_cap_flags {
+       RESOURCE_FLAG_INDEX_VNIC = 0,
+       RESOURCE_FLAG_INDEX_VHBA = 1,
+       RESOURCE_FLAG_INDEX_VSSL = 2,
+       RESOURCE_FLAG_INDEX_USPACE = 3,
+       RESOURCE_FLAG_INDEX_NO_HA = 4,
+       RESOURCE_FLAG_INDEX_XVE = 6,
+       RESOURCE_FLAG_INDEX_MAX
+};
+
+#define RESOURCE_VNIC  (1 << RESOURCE_FLAG_INDEX_VNIC)
+#define RESOURCE_VHBA  (1 << RESOURCE_FLAG_INDEX_VHBA)
+#define RESOURCE_VSSL  (1 << RESOURCE_FLAG_INDEX_VSSL)
+#define RESOURCE_USPACE        (1 << RESOURCE_FLAG_INDEX_USPACE)
+#define RESOURCE_NO_HA (1 << RESOURCE_FLAG_INDEX_NO_HA)
+
+#endif /* __XSMP_COMMON_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xsmp_session.h b/drivers/infiniband/ulp/xsigo/xscore/xsmp_session.h
new file mode 100644 (file)
index 0000000..68cad97
--- /dev/null
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSMP_SESSION_H__
+#define __XSMP_SESSION_H__
+
+#include "xsmp_common.h"
+
+/* Session management messages */
+
+/* Session message types */
+enum xsmp_session_cmd_type {
+       XSMP_SESSION_UNUSED = 0,
+
+       /* Heartbeat between the server and XCM */
+       XSMP_SESSION_HELLO,
+
+       /*
+        * Used by the server while initiating a connection to an XCM
+        * 'resource_flags' specify which services are already active
+        */
+       XSMP_SESSION_REGISTER,
+
+       /* Positive reply from XCM in response to a register from server */
+       XSMP_SESSION_REG_CONFIRM,
+
+       /*
+        * Negative reply from XCM in response to a register from server
+        * 'reason_code' specifies the reason for the reject
+        */
+       XSMP_SESSION_REG_REJECT,
+
+       /* Session shutdown message: initiated by either server or XCM */
+       XSMP_SESSION_SHUTDOWN,
+
+       /* List of services that are active: sent by server to XCM */
+       XSMP_SESSION_RESOURCE_LIST,
+
+       /* Set of error counts sent by server to XCM */
+       XSMP_SESSION_ERROR_STATS,
+
+       /*
+        * Secondary timeout value specified by XCM
+        * after which the datapaths are aborted
+        */
+       XSMP_SESSION_STALE_TIME,
+};
+
+#define CHASSIS_NAME_LEN    32
+#define SESSION_NAME_LEN    32
+struct xsmp_session_msg {
+       union {
+               struct {
+                       u8 type;
+                       u8 code;
+                       u16 length;
+                       u32 resource_flags;
+                       u32 version;    /* current driver version */
+                       u32 chassis_version;    /* chassis sw version
+                                                * this driver can work with */
+                       u32 boot_flags;
+                       u64 fw_ver;
+                       u32 hw_ver;
+                       u32 vendor_part_id;
+                       u32 xsigo_xsmp_version;
+                       char chassis_name[CHASSIS_NAME_LEN];
+                       char session_name[SESSION_NAME_LEN];
+               } __packed;
+               u8 bytes[224];
+       };
+} __packed;
+
+enum {
+       RESOURCE_OS_TYPE_LINUX = 0x01000000,
+       RESOURCE_OS_TYPE_VMWARE = 0x02000000,
+       RESOURCE_MS_CLIENT = 0x80000000,
+};
+
+#endif /* __XSMP_SESSION_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/Kconfig b/drivers/infiniband/ulp/xsigo/xsvhba/Kconfig
new file mode 100644 (file)
index 0000000..510c441
--- /dev/null
@@ -0,0 +1,16 @@
+config INFINIBAND_XSVHBA
+        tristate "Xsigo Virtual HBA"
+        depends on INFINIBAND_XSCORE
+        ---help---
+         Support for the Xsigo virtual HBA allowing SAN
+         Connectivity.
+
+config VHBA_DEBUG
+        bool "Xsigo Virtual HBA debugging" if EMBEDDED
+        depends on INFINIBAND_XSVHBA
+        default n
+        ---help---
+          This option causes debugging code to be compiled into the
+          Xsigo xsvhba driver.  The output can be turned on via the
+          vhba_debug_level module parameter (which can also be
+          after the driver is loaded through sysfs).
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/Makefile b/drivers/infiniband/ulp/xsigo/xsvhba/Makefile
new file mode 100644 (file)
index 0000000..bf12208
--- /dev/null
@@ -0,0 +1,11 @@
+obj-$(CONFIG_INFINIBAND_XSVHBA) := xsvhba.o
+xsvhba-y := vhba_main.o vhba_xsmp.o vhba_create.o vhba_init.o vhba_delete.o \
+           vhba_attr.o vhba_wq.o vhba_proc.o vhba_stats.o vhba_ib.o        \
+           vhba_scsi_intf.o vhba_align.o
+
+ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\"
+ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS
+ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE
+ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT
+ccflags-y += -Idrivers/infiniband/ulp/xsigo/xscore
+ccflags-y += -Idrivers/infiniband/include
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_align.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_align.c
new file mode 100644 (file)
index 0000000..2cf8055
--- /dev/null
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+ /* File which houses logig to align cmd->request_buffer sg lists
+  * to make them work with the IB FMR frames
+  * Bugs: This code uses PAGE_SIZE as HCA Page size.  This is a horribly
+  * incorrect assumption.
+  */
+#include <linux/version.h>
+#include <linux/blkdev.h>
+
+#include <linux/highmem.h>
+
+#include "xs_compat.h"
+#include "vhba_align.h"
+#include "vhba_os_def.h"
+
+static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
+                            void *buf, size_t buflen, int to_buffer)
+{
+       struct scatterlist *sg;
+       size_t buf_off = 0;
+       int i;
+
+       for (i = 0, sg = sgl; i < nents; i++, sg++) {
+               struct page *page;
+               int n = 0;
+               unsigned int sg_off = sg->offset;
+               unsigned int sg_copy = sg->length;
+
+               if (sg_copy > buflen)
+                       sg_copy = buflen;
+               buflen -= sg_copy;
+
+               while (sg_copy > 0) {
+                       unsigned int page_copy;
+                       void *p;
+
+                       page_copy = PAGE_SIZE - sg_off;
+                       if (page_copy > sg_copy)
+                               page_copy = sg_copy;
+
+                       page = nth_page(sg_page(sg), n);
+                       p = kmap_atomic(page);
+
+                       if (to_buffer)
+                               memcpy(buf + buf_off, p + sg_off, page_copy);
+                       else {
+                               memcpy(p + sg_off, buf + buf_off, page_copy);
+                               flush_kernel_dcache_page(page);
+                       }
+
+                       kunmap_atomic(p);
+
+                       buf_off += page_copy;
+                       sg_off += page_copy;
+                       if (sg_off == PAGE_SIZE) {
+                               sg_off = 0;
+                               n++;
+                       }
+                       sg_copy -= page_copy;
+               }
+
+               if (!buflen)
+                       break;
+       }
+
+       return buf_off;
+}
+
+size_t sg_copy_from_buffer(struct scatterlist *sgl, unsigned int nents,
+                          void *buf, size_t buflen)
+{
+       return sg_copy_buffer(sgl, nents, buf, buflen, 0);
+}
+
+size_t sg_copy_to_buffer(struct scatterlist *sgl, unsigned int nents,
+                        void *buf, size_t buflen)
+{
+       return sg_copy_buffer(sgl, nents, buf, buflen, 1);
+}
+
+struct scatterlist *vhba_setup_bounce_buffer(struct srb *sp)
+{
+       struct scatterlist *scat;
+       int nents;
+       int total_len = 0;
+       void *memp;
+       struct scatterlist *orig_sg = scsi_sglist(sp->cmd);
+
+       nents = scsi_sg_count(sp->cmd);
+       scat = scsi_sglist(sp->cmd);
+       total_len = scsi_bufflen(sp->cmd);
+
+       memp = (void *)__get_free_pages(GFP_ATOMIC,
+                                       max(2, get_order(total_len)));
+
+       if (!memp)
+               return NULL;
+
+       if (sp->cmd->sc_data_direction == DMA_TO_DEVICE)
+               sg_copy_to_buffer(scat, nents, memp, total_len);
+
+       /*
+        * Sajid check here, we should not be mucking around with use_sg here
+        */
+       sp->use_sg_orig = scsi_sg_count(sp->cmd);
+       scsi_set_buffer(sp->cmd, memp);
+       sp->bounce_buffer = memp;
+       set_scsi_sg_count(sp->cmd, 0);
+       sp->bounce_buf_len = total_len;
+
+       return orig_sg;
+}
+
+void vhba_tear_bounce_buffer(struct srb *sp)
+{
+       int total_len;
+       void *memp;
+       int nents;
+       struct scatterlist *scat;
+
+       scsi_set_buffer(sp->cmd, sp->unaligned_sg);
+       set_scsi_sg_count(sp->cmd, sp->use_sg_orig);
+
+       nents = scsi_sg_count(sp->cmd);
+       scat = scsi_sglist(sp->cmd);
+       memp = sp->bounce_buffer;
+       total_len = sp->bounce_buf_len;
+
+       if (sp->cmd->sc_data_direction == DMA_FROM_DEVICE)
+               sg_copy_from_buffer(scat, nents, memp, total_len);
+
+       sp->bounce_buffer = NULL;
+       sp->bounce_buf_len = 0;
+       sp->unaligned_sg = NULL;
+
+       free_pages((unsigned long)memp, max(2, get_order(total_len)));
+}
+
+int check_sg_alignment(struct srb *sp, struct scatterlist *sg)
+{
+       int i;
+       int ret = 0;
+       unsigned int sg_offset = SG_OFFSET(sg);
+
+       /*
+        * check for 8 byte alignment only for sg entry
+        * as we can handle an offset for the first entry alone
+        * rest of the entries should be 4k (and thus also 8 byte)
+        * aligned
+        */
+       if ((sg_offset + SG_LENGTH(sg)) % PAGE_SIZE) {
+               dprintk(TRC_UNALIGNED, NULL,
+                       "Need to copy. SG_LENGTH:%d/scsi_sg_count:%d\n",
+                       SG_LENGTH(sg), scsi_sg_count(sp->cmd));
+               ret = 1;
+               goto out;
+       }
+       SG_NEXT(sg);
+
+       /* Check from second entry */
+       for (i = 1; i < scsi_sg_count(sp->cmd); i++, SG_NEXT(sg)) {
+               sg_offset = SG_OFFSET(sg);
+               /* All intermediate sg ptrs should be page (4k) aligned */
+               if (sg_offset) {
+                       dprintk(TRC_UNALIGNED, NULL,
+                               "ptr %d in sg list needs copy len %d addr ",
+                                i, SG_LENGTH(sg));
+                       dprintk(TRC_UNALIGNED, NULL, "align %llu\n",
+                               (unsigned long long int)
+                               (sg_offset & (PAGE_SIZE - 1)));
+                       ret = 1;
+                       goto out;
+               }
+
+               if ((i != (scsi_sg_count(sp->cmd) - 1))
+                   && (SG_LENGTH(sg) % PAGE_SIZE)) {
+                       dprintk(TRC_UNALIGNED, NULL,
+                               "ptr %d in sg list needs copy len %d\n", i,
+                               SG_LENGTH(sg));
+                       ret = 1;
+                       goto out;
+               }
+
+       }
+out:
+       SG_RESET(sg);
+       return ret;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_align.h b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_align.h
new file mode 100644 (file)
index 0000000..19c51b1
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef _VHBA_ALIGN_H_
+#define _VHBA_ALIGN_H_
+
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"         /* Only for assert() */
+
+struct scatterlist *vhba_setup_bounce_buffer(struct srb *sp);
+void vhba_tear_bounce_buffer(struct srb *sp);
+int check_sg_alignment(struct srb *, struct scatterlist *);
+
+#endif
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_attr.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_attr.c
new file mode 100644 (file)
index 0000000..f2d5c8e
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *      Redistribution and use in source and binary forms, with or
+ *      without modification, are permitted provided that the following
+ *      conditions are met:
+ *
+ *       - Redistributions of source code must retain the above
+ *             copyright notice, this list of conditions and the following
+ *             disclaimer.
+ *
+ *       - Redistributions in binary form must reproduce the above
+ *             copyright notice, this list of conditions and the following
+ *             disclaimer in the documentation and/or other materials
+ *             provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include "vhba_xsmp.h"
+
+static void vhba_get_host_port_id(struct Scsi_Host *shost)
+{
+}
+
+static void vhba_get_host_speed(struct Scsi_Host *shost)
+{
+       struct virtual_hba *vhba =
+           vhba_get_context_by_idr((u32) *(shost->hostdata));
+       u32 speed = FC_PORTSPEED_4GBIT;
+
+       if (vhba == NULL) {
+               dprintk(TRC_PROC, NULL, "Could not find vhba\n");
+               return;
+       }
+
+       /* Hard coded for now, but we need this info
+        * sent from the I/O card to us.
+        switch (vhba->speed) {
+        case OFC_SPEED_1GBIT:
+        speed = FC_PORTSPEED_1GBIT;
+        break;
+        case OFC_SPEED_2GBIT:
+        speed = FC_PORTSPEED_2GBIT;
+        break;
+        case OFC_SPEED_4GBIT:
+        speed = FC_PORTSPEED_4GBIT;
+        break;
+        default:
+        speed = FC_PORTSPEED_UNKNOWN;
+        break;
+        }*/
+       fc_host_speed(shost) = speed;
+       DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_host_port_type(struct Scsi_Host *shost)
+{
+       fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
+}
+
+static void vhba_get_host_port_state(struct Scsi_Host *shost)
+{
+       struct virtual_hba *vhba;
+       int link_state;
+
+       vhba = vhba_get_context_by_idr((u32) *(shost->hostdata));
+       if (vhba == NULL) {
+               dprintk(TRC_PROC, NULL, "Could not find vhba\n");
+               return;
+       }
+
+       link_state = atomic_read(&vhba->ha->link_state);
+       switch (link_state) {
+       case 0:
+               fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN;
+               break;
+       case 1:
+               fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
+               break;
+       case 2:
+               fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE;
+               break;
+       default:
+               fc_host_port_state(shost) = FC_PORTSTATE_UNKNOWN;
+               break;
+       }
+       DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_host_symbolic_name(struct Scsi_Host *shost)
+{
+       struct virtual_hba *vhba;
+
+       vhba = vhba_get_context_by_idr((u32) *(shost->hostdata));
+       if (vhba == NULL) {
+               dprintk(TRC_PROC, NULL, "Could not find vhba\n");
+               return;
+       }
+       DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_host_fabric_name(struct Scsi_Host *shost)
+{
+       struct virtual_hba *vhba;
+       u64 node_name;
+
+       vhba = vhba_get_context_by_idr((u32) *(shost->hostdata));
+       if (vhba == NULL) {
+               dprintk(TRC_PROC, NULL, "Could not find vhba\n");
+               return;
+       }
+       node_name = vhba->cfg->wwn;
+       fc_host_fabric_name(shost) = node_name;
+       DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_starget_node_name(struct scsi_target *target)
+{
+
+       struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+       struct fc_port *fc;
+
+       vhba = vhba_get_context_by_idr((u32) *(host->hostdata));
+       if (vhba == NULL) {
+               pr_err("Error: Could not find vhba for this command\n");
+               return;
+       }
+       ha = vhba->ha;
+
+       list_for_each_entry(fc, &ha->disc_ports, list) {
+               if (fc->os_target_id == target->id) {
+                       fc_starget_node_name(target) =
+                           __be64_to_cpu(*(uint64_t *) fc->node_name);
+                       DEC_REF_CNT(vhba);
+                       return;
+               }
+       }
+       fc_starget_node_name(target) = -1;
+       DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_starget_port_name(struct scsi_target *target)
+{
+       struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+       struct fc_port *fc;
+
+       vhba = vhba_get_context_by_idr((u32) *(host->hostdata));
+       if (vhba == NULL) {
+               pr_err("Error: Could not find vhba for this command\n");
+               return;
+       }
+       ha = vhba->ha;
+
+       list_for_each_entry(fc, &ha->disc_ports, list) {
+               if (fc->os_target_id == target->id) {
+                       fc_starget_port_name(target) =
+                           __be64_to_cpu(*(uint64_t *) fc->port_name);
+                       DEC_REF_CNT(vhba);
+                       return;
+               }
+       }
+       fc_starget_port_name(target) = -1;
+       DEC_REF_CNT(vhba);
+}
+
+static void vhba_get_starget_port_id(struct scsi_target *target)
+{
+       struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+       struct fc_port *fc;
+
+       vhba = vhba_get_context_by_idr((u32) *(host->hostdata));
+       if (vhba == NULL) {
+               pr_err("Error: Could not find vhba for this command\n");
+               return;
+       }
+       ha = vhba->ha;
+
+       list_for_each_entry(fc, &ha->disc_ports, list) {
+               if (fc->os_target_id == target->id) {
+                       fc_starget_port_id(target) = fc->d_id.b.domain << 16 |
+                           fc->d_id.b.area << 8 | fc->d_id.b.al_pa;
+                       DEC_REF_CNT(vhba);
+                       return;
+               }
+       }
+       fc_starget_port_id(target) = -1;
+       DEC_REF_CNT(vhba);
+}
+
+static void vhba_set_rport_loss_tmo(struct fc_rport *rport, uint32_t timeout)
+{
+       if (timeout)
+               rport->dev_loss_tmo = timeout + 5;
+       else
+               rport->dev_loss_tmo = 30;       /* Default value XXX revisit */
+
+}
+
+struct fc_host_statistics *vhba_get_fc_host_stats(struct Scsi_Host *shp)
+{
+       return NULL;
+}
+
+struct fc_function_template vhba_transport_functions = {
+
+       .show_host_node_name = 1,
+       .show_host_port_name = 1,
+       .show_host_supported_classes = 1,
+       .show_host_supported_fc4s = 1,
+       .show_host_active_fc4s = 1,
+
+       .get_host_port_id = vhba_get_host_port_id,
+       .show_host_port_id = 1,
+       .get_host_speed = vhba_get_host_speed,
+       .show_host_speed = 1,
+       .get_host_port_type = vhba_get_host_port_type,
+       .show_host_port_type = 1,
+       .get_host_port_state = vhba_get_host_port_state,
+       .show_host_port_state = 1,
+       .get_host_symbolic_name = vhba_get_host_symbolic_name,
+       .show_host_symbolic_name = 1,
+
+       .dd_fcrport_size = sizeof(struct os_tgt),
+       .show_rport_supported_classes = 1,
+
+       .get_host_fabric_name = vhba_get_host_fabric_name,
+       .show_host_fabric_name = 1,
+       .get_starget_node_name = vhba_get_starget_node_name,
+       .show_starget_node_name = 1,
+       .get_starget_port_name = vhba_get_starget_port_name,
+       .show_starget_port_name = 1,
+       .get_starget_port_id = vhba_get_starget_port_id,
+       .show_starget_port_id = 1,
+       .set_rport_dev_loss_tmo = vhba_set_rport_loss_tmo,
+       .show_rport_dev_loss_tmo = 1,
+       .get_fc_host_stats = vhba_get_fc_host_stats,
+
+};
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_create.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_create.c
new file mode 100644 (file)
index 0000000..5e0c08f
--- /dev/null
@@ -0,0 +1,384 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *      Redistribution and use in source and binary forms, with or
+ *      without modification, are permitted provided that the following
+ *      conditions are met:
+ *
+ *       - Redistributions of source code must retain the above
+ *             copyright notice, this list of conditions and the following
+ *             disclaimer.
+ *
+ *       - Redistributions in binary form must reproduce the above
+ *             copyright notice, this list of conditions and the following
+ *             disclaimer in the documentation and/or other materials
+ *             provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * vhba_create.c
+ */
+
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+/* #include <linux/smp_lock.h> */
+#include <linux/delay.h>
+#include "vhba_xsmp.h"
+#include "vhba_defs.h"
+#include "vhba_ib.h"
+#include "vhba_scsi_intf.h"
+
+#include <scsi/scsi_transport_fc.h>
+
+static u32 xg_vhba_mem_alloc(struct virtual_hba *);
+
+int vhba_create(xsmp_cookie_t xsmp_hndl, struct vhba_xsmp_msg *msg)
+{
+       struct virtual_hba *vhba;
+       struct Scsi_Host *host;
+       struct scsi_xg_vhba_host *ha;
+       struct vhba_xsmp_msg *msg1;
+       uint32_t mtu;
+       u32 i;
+       int ret;
+       int vhba_xsmp_msg_len = sizeof(struct vhba_xsmp_msg);
+       enum vhba_xsmp_error_codes nack_code = VHBA_NACK_GENERAL_ERROR;
+
+       vhba = kmalloc(sizeof(struct virtual_hba), GFP_ATOMIC);
+       if (!vhba) {
+               eprintk(NULL, "vhba alloc failed\n");
+               vhba_xsmp_nack(xsmp_hndl, (u8 *) msg, vhba_xsmp_msg_len,
+                              VHBA_NACK_ALLOC_ERROR);
+               return 1;
+       }
+
+       memset(vhba, 0, sizeof(struct virtual_hba));
+
+       atomic_set(&vhba->ref_cnt, 0);
+       atomic_set(&vhba->vhba_state, VHBA_STATE_NOT_ACTIVE);
+
+       init_waitqueue_head(&vhba->timer_wq);
+       init_waitqueue_head(&vhba->delete_wq);
+
+       ret = vhba_create_context(msg, vhba);
+
+       if (ret == 0) {
+               /*
+                * Duplicate vHBA, probably due to previous sync operation
+                */
+               dprintk(TRC_XSMP_ERRS, NULL,
+                       "VHBA with resource_id <0x%Lx> exists, ",
+                        msg->resource_id);
+               dprintk(TRC_XSMP_ERRS, NULL, "not installing\n");
+               vhba->xsmp_hndl = xsmp_hndl;
+               vhba_xsmp_ack(xsmp_hndl, (u8 *) msg, vhba_xsmp_msg_len);
+               vhba_xsmp_notify(xsmp_hndl, msg->resource_id,
+                                XSMP_VHBA_OPER_UP);
+               kfree(vhba);
+               return 0;
+       } else if (ret == -1) {
+               eprintk(NULL, "mem alloc failed\n");
+               nack_code = VHBA_NACK_ALLOC_ERROR;
+               goto err_ret_5;
+       } else if (ret != 1) {
+               eprintk(NULL, "Error: unable to create context [%s]\n",
+                       msg->vh_name);
+               goto err_ret_5;
+       }
+
+       mtu = msg->mtu;
+
+       if (mtu == 0)
+               mtu = 256;      /* 256KB */
+       if (mtu > 2040)         /* 2MB - 8KB */
+               mtu = 2040;
+       dprintk(TRC_XSMP, NULL, "mtu size=%d\n", mtu);
+
+       vhba_max_dsds_in_fmr = (mtu * 1024) / PAGE_SIZE;
+       vhba_max_fmr_pages = ((mtu * 1024) / PAGE_SIZE) + 2;
+       vhba_max_transfer_size = (mtu * 1024) / 512;
+
+       xg_vhba_driver_template.sg_tablesize = vhba_max_dsds_in_fmr;
+
+       if (vhba_max_transfer_size != VHBA_DEFAULT_TRANSFER_SIZE)
+               xg_vhba_driver_template.max_sectors = vhba_max_transfer_size;
+
+       host = scsi_host_alloc(&xg_vhba_driver_template, sizeof(int));
+
+       if (host == NULL) {
+               eprintk(NULL, "scsi host alloc failed\n");
+               nack_code = VHBA_NACK_ALLOC_ERROR;
+               goto err_ret_5;
+       }
+
+       ha = (struct scsi_xg_vhba_host *)
+           kmalloc(sizeof(struct scsi_xg_vhba_host), GFP_ATOMIC);
+
+       if (!ha) {
+               eprintk(NULL, "Ha alloc failed\n");
+               nack_code = VHBA_NACK_ALLOC_ERROR;
+               goto err_ret_4;
+       }
+       memset(ha, 0, sizeof(struct scsi_xg_vhba_host));
+       ha->host = host;
+       ha->host_no = host->host_no;
+       sprintf(ha->host_str, "%ld", ha->host_no);
+
+       spin_lock_init(&ha->io_lock);
+
+       /* Initialize proc related counters */
+       ha->stats.io_stats.total_io_rsp = 0;
+       ha->stats.io_stats.total_read_reqs = 0;
+       ha->stats.io_stats.total_write_reqs = 0;
+       ha->stats.io_stats.total_task_mgmt_reqs = 0;
+       ha->stats.io_stats.total_read_mbytes = 0;
+       ha->stats.io_stats.total_write_mbytes = 0;
+       ha->stats.io_stats.total_copy_ios = 0;
+       ha->stats.io_stats.total_copy_page_allocs = 0;
+       ha->stats.io_stats.total_copy_page_frees = 0;
+
+       for (i = 0; i < VHBA_MAX_VH_Q_COUNT; i++) {
+               atomic_set(&ha->stats.io_stats.num_vh_q_reqs[i], 0);
+               atomic_set(&ha->stats.io_stats.vh_q_full_cnt[i], 0);
+       }
+
+       ha->ports = MAX_BUSES;
+       ha->request_q_length = REQUEST_ENTRY_CNT_24XX;
+       host->can_queue = vhba_max_q_depth;
+       if ((vhba_max_q_depth > 64) || (vhba_max_q_depth < 1)) {
+               /*
+                * Looks like a bogus value, set it to default (16).
+                */
+               host->can_queue = VHBA_MAX_VH_Q_DEPTH;
+       }
+       ha->data_qp_handle = 0;
+       ha->control_qp_handle = 0;
+       atomic_set(&ha->qp_status, VHBA_QP_NOTCONNECTED);
+
+       for (i = 0; i < REQUEST_ENTRY_CNT_24XX; i++)
+               ha->send_buf_ptr[i] = NULL;
+
+       spin_lock_init(&ha->list_lock);
+       INIT_LIST_HEAD(&ha->disc_ports);
+       INIT_LIST_HEAD(&ha->defer_list);
+       atomic_set(&ha->periodic_def_cnt, 0);
+
+       dprintk(TRC_XSMP, NULL, "create_vhba: new vhba = %p\n", (void *)vhba);
+
+       *(host->hostdata) = (int)vhba->idr;
+       vhba->ha = ha;
+       ha->vhba = vhba;
+       ha->max_tgt_id = 0;
+       ha->max_targets = 0;
+       ha->tca_guid = be64_to_cpu(msg->tca_guid);
+       ha->tca_lid = be16_to_cpu(msg->tca_lid);
+
+       vhba->xsmp_hndl = xsmp_hndl;
+       vhba->scanned_once = 0;
+       vhba->scan_reqd = 0;
+       vhba->sync_needed = 0;
+       vhba->ha->sync_flag = 1;
+       vhba->reconn_try_cnt = 0;
+
+       xcpm_get_xsmp_session_info(xsmp_hndl, &vhba->xsmp_info);
+
+       if (msg->vhba_flag & 0x1)
+               dprintk(TRC_XSMP, NULL, "This is a boot vhba\n");
+
+       if ((msg->vhba_flag & 0x1) == 0x0)
+               dprintk(TRC_XSMP, NULL, "This is a regular vhba\n");
+
+       if (xg_vhba_mem_alloc(vhba)) {
+               eprintk(vhba, "failure in xg_vhba_mem_alloc\n");
+               nack_code = VHBA_NACK_ALLOC_ERROR;
+               goto err_ret_2;
+       }
+
+       msg1 = kmalloc(sizeof(struct vhba_xsmp_msg), GFP_ATOMIC);
+
+       if (!msg1) {
+               eprintk(vhba, "kmalloc for vhba xsmp msg failed\n");
+               nack_code = VHBA_NACK_ALLOC_ERROR;
+               goto err_ret_1;
+       }
+
+       memcpy(msg1, msg, sizeof(struct vhba_xsmp_msg));
+       vhba->cfg = msg1;
+       ha->resource_id = msg->resource_id;
+       vhba->resource_id = msg->resource_id;
+       dprintk(TRC_INFO, vhba, "resource id is %Lx\n", msg->resource_id);
+       host->this_id = 255;
+       host->cmd_per_lun = cmds_per_lun;
+       host->max_cmd_len = MAX_CMDSZ;
+       host->max_channel = ha->ports - 1;
+       if (vhba->cfg->lunmask_enable)
+               ha->max_luns = 256;
+       else
+               ha->max_luns = MAX_FIBRE_LUNS_MORE;
+       host->max_lun = ha->max_luns;
+       host->unique_id = ha->instance;
+       dprintk(TRC_XSMP, vhba,
+               "detect hba %ld at address = %p\n", ha->host_no, ha);
+
+       /* Use the VMware consistent naming convention & register the
+        * device as a FC-capable transport.  This FC-transport template
+        * needs to be pre-registered, and typically during module init */
+       host->transportt = vhba_transport_template;
+       host->max_channel = 0;
+       host->max_lun = MAX_LUNS - 1;   /*0xffff-1 */
+       host->max_id = MAX_TARGETS;
+       ha->flags.init_done = 1;
+       ret = scsi_add_host(host, NULL);
+       if (ret) {
+               pr_err("scsi_add_host failed: ret = %d\n", ret);
+               goto err_ret_1;
+       }
+
+       {
+               u64 port_name = wwn_to_u64((u8 *) &(vhba->cfg->wwn));
+               /* Hard coding the node name isn't right, but dng it for now */
+               u64 node_name = port_name | 0x100000000;
+
+               fc_host_node_name(host) = node_name;
+               fc_host_port_name(host) = port_name;
+
+       }
+
+       if (vhba_initialize(vhba, msg1)) {
+               eprintk(vhba, "scsi(%ld): Failed to initialize adapter -\n"
+                       "Adapter flags %x.\n", ha->host_no, ha->device_flags);
+               goto err_ret_0;
+       }
+
+       vhba_xsmp_ack(xsmp_hndl, (u8 *) msg, vhba_xsmp_msg_len);
+       sprintf((char *)ha->vhba_name, "vhba:%p", vhba);
+       vhba_add_proc_entry(vhba);
+       vhba_add_target_proc_entry(vhba);
+       vhba_add_context(vhba);
+
+       /* Any VHBA context setting, data & control IB queue pairs, etc.. */
+       ret = vhba_conn_init(vhba);
+       if (ret) {
+               eprintk(vhba, "Trouble doing Conn Init. Returning %d\n", ret);
+               vhba_remove_context(vhba->resource_id);
+               goto err_ret_0;
+       }
+       ret = vhba_ib_connect_qp(vhba);
+       if (ret) {
+               eprintk(vhba, "Trouble Setting up Conn. Returning %d\n", ret);
+               vhba_remove_context(vhba->resource_id);
+               goto err_ret_0;
+       }
+       atomic_inc(&vhba_count);
+       return 0;
+
+err_ret_0:
+       scsi_host_put(host);
+err_ret_1:
+       if (vhba->cfg != NULL)
+               kfree(vhba->cfg);
+err_ret_2:
+       kfree(ha);
+err_ret_4:
+err_ret_5:
+       kfree(vhba);
+       vhba_xsmp_nack(xsmp_hndl, (u8 *) msg, vhba_xsmp_msg_len, nack_code);
+       return -1;
+}
+
+void xg_vhba_free_device(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+
+       if (ha->request_ring) {
+               ib_dma_free_coherent(vhba->xsmp_info.ib_device,
+                                    ha->request_q_length *
+                                    sizeof(struct cmd_type_7),
+                                    ha->request_ring, ha->request_dma);
+               dprintk(TRC_XSMP, vhba,
+                       "called ib_dma_free_coherent for req ring\n");
+       } else
+               dprintk(TRC_XSMP_ERRS, vhba, "request ring already NULL!\n");
+
+       if (ha->req_ring_rindex) {
+               ib_dma_free_coherent(vhba->xsmp_info.ib_device,
+                                    sizeof(u32), ha->req_ring_rindex,
+                                    ha->req_ring_rindex_dma);
+               dprintk(TRC_XSMP, vhba,
+                       "called dma_free_coherent for req ring rindex\n");
+       } else
+               dprintk(TRC_XSMP_ERRS, vhba, "request ring ptr already NULL\n");
+}
+
+int get_outstding_cmd_entry(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       int start_cnt = ha->current_outstanding_cmd;
+       int curr_cnt = ha->current_outstanding_cmd;
+
+       while ((curr_cnt < MAX_OUTSTANDING_COMMANDS)) {
+               if (ha->outstanding_cmds[curr_cnt] == NULL) {
+                       ha->current_outstanding_cmd = curr_cnt;
+                       return curr_cnt;
+               } else
+                       curr_cnt++;
+       }
+
+       ha->stats.ib_stats.total_outstding_q_wraps++;
+       curr_cnt = 0;
+       while (curr_cnt < start_cnt) {
+               if (ha->outstanding_cmds[curr_cnt] == NULL) {
+                       ha->current_outstanding_cmd = curr_cnt;
+                       return curr_cnt;
+               } else
+                       curr_cnt++;
+       }
+
+       ha->stats.ib_stats.total_req_q_fulls++;
+       return -1;
+}
+
+static u32 xg_vhba_mem_alloc(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+
+       ha->request_ring = ib_dma_alloc_coherent(vhba->xsmp_info.ib_device,
+                                                ha->request_q_length *
+                                                sizeof(struct cmd_type_7),
+                                                &ha->request_dma, GFP_KERNEL);
+       if (ha->request_ring == NULL) {
+               eprintk(vhba, "alloc failed for req ring\n");
+               return 1;
+       }
+
+       ha->req_ring_rindex = ib_dma_alloc_coherent(vhba->xsmp_info.ib_device,
+                                                   sizeof(u32),
+                                                   &ha->req_ring_rindex_dma,
+                                                   GFP_KERNEL);
+       if (ha->req_ring_rindex == NULL) {
+               ib_dma_free_coherent(vhba->xsmp_info.ib_device,
+                                    ha->request_q_length *
+                                    sizeof(struct cmd_type_7),
+                                    ha->request_ring, ha->request_dma);
+               eprintk(vhba, "alloc failed for req ring rindex\n");
+               return 1;
+       }
+       return 0;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_defs.h b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_defs.h
new file mode 100644 (file)
index 0000000..a97b8c6
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __VHBA_DEFS_H__
+#define __VHBA_DEFS_H__
+
+#include <linux/types.h>
+
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+
+#define VHBA_VALIDATE_STATE(vhba)                              \
+{                                                              \
+       if (atomic_read(&vhba->ha->qp_status) != VHBA_QP_CONNECTED) {   \
+               dprintk(0, vhba,                                \
+                       "Error - QPs not connected!\n");        \
+               ret_error = 1;                                  \
+       }                                                       \
+}
+
+extern int vhba_abort_recovery_count;
+extern struct scsi_transport_template *vhba_transport_template;
+extern int vhba_max_dsds_in_fmr;
+extern int vhba_max_fmr_pages;
+extern int hba_offset;
+extern int force_sp_copy;
+extern int vhba_use_fmr;
+extern int boot_vhba_use_fmr;
+extern struct scsi_host_template xg_vhba_driver_template;
+extern int cmds_per_lun;
+extern int vhba_max_transfer_size;
+extern int vhba_max_scsi_retry;
+extern int vhba_initialize(struct virtual_hba *vhba, struct vhba_xsmp_msg *msg);
+extern int vhba_add_proc_entry(struct virtual_hba *vhba);
+extern int vhba_add_target_proc_entry(struct virtual_hba *vhba);
+extern int vhba_remove_target_proc_entry(struct virtual_hba *vhba);
+extern void vhba_remove_proc_entry(struct virtual_hba *vhba);
+extern void add_to_defer_list(struct scsi_xg_vhba_host *ha, struct srb *sp);
+extern int vhba_map_buf_fmr(struct virtual_hba *vhba, u64 *phys_addr,
+                           int num_pgs, u64 *mapped_fmr_iova, struct srb *sp,
+                           int index);
+extern void extend_timeout(struct scsi_cmnd *cmd, struct srb *sp, int timeout);
+extern void ib_link_down(struct scsi_xg_vhba_host *ha);
+extern void ib_link_dead_poll(struct scsi_xg_vhba_host *ha);
+extern int vhba_send_heart_beat(struct virtual_hba *vhba);
+extern int check_number_of_vhbas_provisioned(void);
+extern int vhba_check_discs_status(void);
+
+int vhba_create_procfs_root_entries(void);
+void vhba_remove_procfs_root_entries(void);
+ssize_t vhba_read(struct file *, char *, size_t, loff_t *);
+ssize_t vhba_write(struct file *, const char *, size_t, loff_t *);
+int vhba_open(struct inode *, struct file *);
+int vhba_release(struct inode *, struct file *);
+int vhba_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
+
+void vhba_internal_processing(void);
+
+/*
+ * Globals
+ */
+extern struct semaphore vhba_init_sem;
+extern int vhba_ready;
+extern struct timer_list vhba_init_timer;
+extern int vhba_init_timer_on;
+
+extern struct semaphore vhba_cmd_sem;
+extern int vhba_cmd_done;
+extern struct timer_list vhba_cmd_timer;
+extern int vhba_cmd_timer_on;
+
+extern int bench_target_count;
+extern int vhba_multiple_q;
+
+#define VHBA_RECONN_INTERVAL 5
+#define MAX_IOCBS_IN_VH 2
+
+extern struct proc_dir_entry *proc_root_vhba;
+extern struct proc_dir_entry *proc_root_vhba_dev;
+extern struct proc_dir_entry *proc_root_vhba_targ;
+
+int vhba_print_io_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_ib_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_xsmp_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_fmr_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_fc_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_scsi_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+
+extern char vhba_version_str[40];
+extern int vhba_xsmp_service_id;
+extern struct service_type_info service_info;
+extern struct vhba_xsmp_stats vhba_xsmp_stats;
+
+extern int init_status;
+extern int dev_major;
+extern int vhba_ready;
+extern struct timer_list vhba_init_timer;
+extern int vhba_init_timer_on;
+extern struct vhba_discovery_msg disc_info;
+extern struct vhba_io_cmd vhba_io_cmd_o;
+
+void xg_vhba_free_device(struct virtual_hba *);
+int vhba_send_init_blk(struct virtual_hba *);
+int vhba_send_enable_vhba(struct virtual_hba *);
+int vhba_send_vhba_write_index(int);
+int send_abort_command(int, struct srb *sp, unsigned int t);
+int send_device_reset(int, unsigned int t);
+int send_link_reset(int);
+int vhbawq_init(void);
+int vhbawq_queue(void);
+int vhbawq_cleanup(void);
+
+#endif /* __VHBA_DEFS_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_delete.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_delete.c
new file mode 100644 (file)
index 0000000..576360d
--- /dev/null
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *      Redistribution and use in source and binary forms, with or
+ *      without modification, are permitted provided that the following
+ *      conditions are met:
+ *
+ *       - Redistributions of source code must retain the above
+ *             copyright notice, this list of conditions and the following
+ *             disclaimer.
+ *
+ *       - Redistributions in binary form must reproduce the above
+ *             copyright notice, this list of conditions and the following
+ *             disclaimer in the documentation and/or other materials
+ *             provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+/* #include <linux/smp_lock.h> */
+#include <linux/delay.h>
+
+#include "vhba_ib.h"
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+#include "vhba_defs.h"
+#include "vhba_scsi_intf.h"
+
+/* Get the driver IO-Lock for use here. */
+
+int vhba_delete(u64 resource_id)
+{
+       int i = 0, j = 0;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha = NULL;
+
+       vhba = vhba_remove_context(resource_id);
+
+       if (vhba == NULL) {
+               dprintk(TRC_XSMP_ERRS, NULL, "Non existent vhba\n");
+               return -EINVAL;
+       }
+
+       ha = vhba->ha;
+
+       /* Flush defered list */
+       if (atomic_read(&ha->ib_status) == VHBA_IB_DOWN) {
+               atomic_set(&ha->ib_link_down_cnt, 0);
+               atomic_set(&ha->ib_status, VHBA_IB_DEAD);
+       }
+
+       dprintk(TRC_XSMP, vhba, "disconnecting qps for vhba %p\n", vhba);
+       vhba_ib_disconnect_qp(vhba);
+       dprintk(TRC_XSMP, vhba, "purging ios for vhba %p\n", vhba);
+       vhba_purge_pending_ios(vhba);
+
+       dprintk(TRC_XSMP, vhba, "uniniting QP connections\n");
+       xscore_conn_destroy(&vhba->ctrl_conn.ctx);
+       xscore_conn_destroy(&vhba->data_conn.ctx);
+
+       atomic_set(&vhba->vhba_state, VHBA_STATE_NOT_ACTIVE);
+
+       if (atomic_read(&vhba->ref_cnt)) {
+               int err;
+
+               dprintk(TRC_XSMP, NULL, "%s(): vhba %p has ref_cnt %d,\n"
+                       "waiting on...\n",
+                       __func__, vhba, atomic_read(&vhba->ref_cnt));
+
+               err = wait_event_timeout(vhba->delete_wq,
+                                        !atomic_read(&vhba->ref_cnt), 30 * HZ);
+               if (err == 0) {
+                       eprintk(vhba, "vhba_delete: ref_cnt %d is non zero\n",
+                               atomic_read(&vhba->ref_cnt));
+                       return -EIO;
+               }
+       }
+
+       dprintk(TRC_XSMP, NULL, "setting oper state dn\n");
+       vhba_xsmp_notify(vhba->xsmp_hndl,
+                        vhba->resource_id, XSMP_VHBA_OPER_DOWN);
+
+       vhba_remove_proc_entry(vhba);
+       vhba_remove_target_proc_entry(vhba);
+
+       for (i = 0; i < REQUEST_ENTRY_CNT_24XX; i++) {
+               if (ha->send_buf_ptr[i] != NULL) {
+                       kfree(ha->send_buf_ptr[i]);
+                       ha->send_buf_ptr[i] = NULL;
+               }
+       }
+       xg_vhba_free_device(vhba);
+       vhba_scsi_release(vhba);
+
+       /*
+        * Free memory allocated for tgts/lun's etc.
+        */
+       for (i = 0; i < MAX_FIBRE_TARGETS; i++) {
+               if (TGT_Q(ha, i)) {
+                       dprintk(TRC_XSMP, NULL, "freeing tgt %d\n", i);
+                       for (j = 0; j < MAX_FIBRE_LUNS; j++) {
+                               if (LUN_Q(ha, i, j)) {
+                                       dprintk(TRC_XSMP,
+                                               NULL, "freeing lun %d\n", j);
+                                       if (LUN_Q(ha, i, j)->fclun) {
+                                               kfree(LUN_Q(ha, i, j)->fclun);
+                                               LUN_Q(ha, i, j)->fclun = NULL;
+                                       }
+                                       kfree(LUN_Q(ha, i, j));
+                                       LUN_Q(ha, i, j) = NULL;
+                               }
+                       }       /* end free all lun's under the tgt */
+                       if (TGT_Q(ha, i)->fcport) {
+                               kfree(TGT_Q(ha, i)->fcport);
+                               TGT_Q(ha, i)->fcport = NULL;
+                       }
+                       kfree(TGT_Q(ha, i));
+                       TGT_Q(ha, i) = NULL;
+               }
+
+       }                       /* end free all tgts */
+
+       kfree(ha);
+       kfree(vhba);
+       atomic_dec(&vhba_count);
+
+       return 0;
+}                              /* vhba_delete() */
+
+int vhba_scsi_release(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+
+       dprintk(TRC_XSMP, vhba, "deleting scsi host for vhba %p\n", vhba);
+
+       fc_remove_host(ha->host);
+
+       scsi_remove_host(ha->host);
+
+       scsi_host_put(ha->host);
+
+       vhba_dealloc_fmr_pool(vhba);
+
+       if (vhba->cfg != NULL) {
+               kfree(vhba->cfg);
+               vhba->cfg = NULL;
+       }
+       return 0;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_ib.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_ib.c
new file mode 100644 (file)
index 0000000..a209b2a
--- /dev/null
@@ -0,0 +1,695 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ *  - Redistributions of source code must retain the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer.
+ *
+ *  - Redistributions in binary form must reproduce the above
+ *    copyright notice, this list of conditions and the following
+ *    disclaimer in the documentation and/or other materials
+ *    provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/delay.h>
+
+#include <linux/version.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <rdma/ib_verbs.h>
+
+#include "vhba_ib.h"
+#include "vhba_defs.h"
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+
+void vhba_data_send_comp_handler(void *client_arg, void *msg, int status,
+                                int n);
+
+void vhba_connection_setup(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       int qp_status, ret;
+
+       qp_status = atomic_read(&ha->qp_status);
+
+       switch (qp_status) {
+       case VHBA_QP_RECONNECTING:
+               atomic_set(&ha->qp_status, VHBA_QP_PARTIAL_CONNECT);
+               break;
+       case VHBA_QP_PARTIAL_CONNECT:
+               atomic_set(&ha->qp_status, VHBA_QP_CONNECTED);
+               dprintk(TRC_INFO, vhba, "QP is connected\n");
+               vhba->reconn_attempt = 0;
+               vhba->qp_count++;
+               atomic_set(&ha->ib_status, VHBA_IB_UP);
+               dprintk(TRC_IB, vhba, "setting oper state up\n");
+               vhba_xsmp_notify(vhba->xsmp_hndl,
+                                vhba->resource_id, XSMP_VHBA_OPER_UP);
+               break;
+       default:
+               eprintk(vhba,
+                       "Error - Unexpected QP state detected %d\n", qp_status);
+               return;
+       }                       /* end switch */
+
+       if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED) {
+               dprintk(TRC_INFO, vhba, "sending init blk\n");
+               ret = vhba_send_init_blk(vhba);
+               if (ret)
+                       eprintk(vhba, "sending init blk failed\n");
+               dprintk(TRC_INFO, vhba, "sending enable vhba\n");
+               ret = vhba_send_enable_vhba(vhba);
+               if (ret)
+                       eprintk(vhba, "sending enable vhba failed\n");
+       }
+}
+
+void vhba_control_callback(void *context, int event)
+{
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       vhba = (struct virtual_hba *)vhba_get_context_by_idr((u32)
+                                                            (unsigned long)
+                                                            context);
+       if (!vhba) {
+               eprintk(NULL, "Invalid context\n");
+               return;
+       }
+       ha = vhba->ha;
+
+       switch (event) {
+       case XSCORE_CONN_RDISCONNECTED:
+       case XSCORE_CONN_LDISCONNECTED:
+               dprintk(TRC_IB, vhba, "Received Control Disconnect\n");
+               ha->stats.ib_stats.cqp_remote_disconn_cnt++;
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               break;
+       case XSCORE_CONN_CONNECTED:
+               dprintk(TRC_IB, vhba, "Control Is Connected\n");
+               ha->stats.ib_stats.cqp_up_cnt++;
+               ha->control_qp_handle = XSCORE_CONN_CONNECTED;
+               vhba_connection_setup(vhba);
+               break;
+       case XSCORE_CONN_ERR:
+               ib_link_down(ha);
+               ha->control_qp_handle = XSCORE_CONN_ERR;
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               break;
+       default:
+               break;
+       }
+       DEC_REF_CNT(vhba);
+
+}
+
+void vhba_data_callback(void *context, int event)
+{
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       vhba = (struct virtual_hba *)vhba_get_context_by_idr((u32)
+                                                            (unsigned long)
+                                                            context);
+       if (!vhba) {
+               eprintk(NULL, "Invalid COntext\n");
+               return;
+       }
+       ha = vhba->ha;
+
+       switch (event) {
+       case XSCORE_CONN_RDISCONNECTED:
+       case XSCORE_CONN_LDISCONNECTED:
+               dprintk(TRC_IB, vhba, "Received Data Disconnect\n");
+               ha->stats.ib_stats.dqp_remote_disconn_cnt++;
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               break;
+       case XSCORE_CONN_CONNECTED:
+               dprintk(TRC_IB, vhba, "Data Connected\n");
+               ha->data_qp_handle = XSCORE_CONN_CONNECTED;
+               vhba_connection_setup(vhba);
+               break;
+       case XSCORE_CONN_ERR:
+               ib_link_down(ha);
+               ha->data_qp_handle = XSCORE_CONN_ERR;
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               break;
+       default:
+               break;
+       }
+       DEC_REF_CNT(vhba);
+
+}
+
+int vhba_conn_init(struct virtual_hba *vhba)
+{
+       struct xsvhba_conn *cp = &vhba->ctrl_conn;
+       struct xscore_conn_ctx *cctx = &cp->ctx;
+       struct xt_cm_private_data *cmp;
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       int ret;
+
+       /*
+        * Control connection
+        */
+       cp->type = QP_TYPE_CONTROL;
+       cctx->tx_ring_size = 8;
+       cctx->rx_ring_size = 8;
+       cctx->rx_buf_size = VHBA_CQP_MAX_BUF_SIZE;
+       cctx->client_arg = (void *)(unsigned long)(vhba->idr);
+       cctx->event_handler = vhba_control_callback;
+       cctx->alloc_buf = 0;
+       cctx->free_buf = 0;
+       cctx->send_compl_handler = vhba_ctrl_send_comp_handler;
+       cctx->recv_msg_handler = vhba_cqp_recv_comp_handler;
+       cctx->dguid = ha->tca_guid;
+       cctx->dlid = ha->tca_lid;
+       cctx->service_id = be64_to_cpu(TCA_SERVICE_ID);
+       cctx->features = XSCORE_DONT_FREE_SENDBUF;
+
+       cmp = (struct xt_cm_private_data *)cctx->priv_data;
+       cmp->vid = cpu_to_be64(vhba->resource_id);
+       cmp->qp_type = cpu_to_be16(QP_TYPE_CONTROL);
+       cmp->data_qp_type = 0;
+       cctx->priv_data_len = sizeof(*cmp);
+
+       ret = xscore_conn_init(cctx, vhba->xsmp_info.port);
+       if (ret) {
+               eprintk(vhba, "xscore_conn_init ctrl error for VID %llx %d\n",
+                       vhba->resource_id, ret);
+               return ret;
+       }
+
+       cp = &vhba->data_conn;
+       cctx = &cp->ctx;
+
+       cp->type = QP_TYPE_DATA;
+       cctx->tx_ring_size = VHBA_DQP_SEND_Q_SZ;
+       cctx->rx_ring_size = VHBA_DQP_RECV_Q_SZ;
+       cctx->rx_buf_size = VHBA_DQP_MAX_BUF_SIZE;
+       cctx->client_arg = (void *)(unsigned long)(vhba->idr);
+       cctx->event_handler = vhba_data_callback;
+       cctx->alloc_buf = 0;
+       cctx->free_buf = 0;
+       cctx->send_compl_handler = vhba_data_send_comp_handler;
+       cctx->recv_msg_handler = vhba_recv_comp_handler;
+       cctx->dguid = ha->tca_guid;
+       cctx->dlid = ha->tca_lid;
+       cctx->service_id = be64_to_cpu(TCA_SERVICE_ID);
+       cctx->features = XSCORE_RDMA_SUPPORT | XSCORE_DONT_FREE_SENDBUF;
+
+       cmp = (struct xt_cm_private_data *)cctx->priv_data;
+       cmp->vid = cpu_to_be64(vhba->resource_id);
+       cmp->qp_type = cpu_to_be16(QP_TYPE_DATA);
+       cmp->data_qp_type = 0;
+       cctx->priv_data_len = sizeof(*cmp);
+
+       ret = xscore_conn_init(cctx, vhba->xsmp_info.port);
+       if (ret) {
+               eprintk(vhba, "xscore_conn_init data error for VID %llx %d\n",
+                       vhba->resource_id, ret);
+
+               xscore_conn_destroy(&vhba->ctrl_conn.ctx);
+       }
+       return ret;
+}
+
+int vhba_ib_connect_qp(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+
+       int ret = 0;
+
+       /* Create Control queue pair with the destination TCA */
+       if ((atomic_read(&ha->qp_status) == VHBA_QP_PARTIAL_CONNECT) ||
+           (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED)) {
+               dprintk(TRC_IB_ERRS, vhba, "Error - Invalid qp state: %d\n",
+                       atomic_read(&ha->qp_status));
+               ret = 1;
+               goto out;
+       }
+
+       atomic_set(&ha->qp_status, VHBA_QP_RECONNECTING);
+
+       ret = xscore_conn_connect(&vhba->data_conn.ctx, 0);
+
+       if (ret) {
+               eprintk(vhba, "Data QP Connect failed: ret = %d\n", ret);
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               goto out;
+       }
+
+       ret = xscore_conn_connect(&vhba->ctrl_conn.ctx, 0);
+
+       if (ret) {
+               eprintk(vhba, "Control QP Connect failed: ret = %d\n", ret);
+               xscore_conn_disconnect(&vhba->data_conn.ctx, 0);
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               goto out;
+       }
+       ret = 0;
+out:
+       return ret;
+
+}
+
+int vhba_ib_disconnect_qp(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+
+       if (ha->control_qp_handle == XSCORE_CONN_CONNECTED) {
+               dprintk(TRC_IB, vhba, "Disconnecting Control\n");
+               xscore_conn_disconnect(&vhba->ctrl_conn.ctx, 0);
+       }
+
+       if (ha->data_qp_handle == XSCORE_CONN_CONNECTED) {
+               dprintk(TRC_IB, vhba, "Disconnecting Data\n");
+               xscore_conn_disconnect(&vhba->data_conn.ctx, 0);
+       }
+
+       atomic_set(&ha->qp_status, VHBA_QP_NOTCONNECTED);
+       return 0;
+}
+
+int vhba_alloc_fmr_pool(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct ib_device_attr dev_attr;
+       int ret;
+       int page_shift = 0;
+       struct ib_fmr_pool_param pool_params = {
+               .max_pages_per_fmr = vhba_max_fmr_pages,
+               .access = IB_ACCESS_LOCAL_WRITE |
+                   IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE,
+               .pool_size = VHBA_FMR_POOL_SIZE,
+               .dirty_watermark = VHBA_FMR_DIRTY_MARK,
+               .flush_function = 0,
+               .flush_arg = 0,
+               .cache = 1
+       };
+
+       ret = ib_query_device(vhba->xsmp_info.ib_device, &dev_attr);
+       if (ret) {
+               eprintk(vhba, "query_device error %d\n", ret);
+               return -1;
+       }
+
+       page_shift = ffs(dev_attr.page_size_cap) - 1;
+       if (page_shift < 0) {
+               page_shift = PAGE_SIZE;
+               dprintk(TRC_IB_ERRS, vhba,
+                       "ib_query_device returned a page_size of 0\n");
+       }
+       page_shift = max(12, page_shift);
+
+       dprintk(TRC_IB, vhba, "Using page shift: %d\n", page_shift);
+
+       pool_params.page_shift = page_shift;
+
+       /*
+        * Allocate an fmr pool, assuming that the pd has been obtained
+        * before the call
+        */
+       ha->fmr_pool = ib_create_fmr_pool(vhba->xsmp_info.pd, &pool_params);
+
+       if (IS_ERR(ha->fmr_pool) || (!ha->fmr_pool)) {
+               ha->fmr_pool = NULL;
+               dprintk(TRC_IB_ERRS, vhba, "ib_create_fmr_pool failed\n");
+               return -1;
+       }
+       return 0;
+}
+
+void vhba_dealloc_fmr_pool(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+
+       if (ha->fmr_pool) {
+               ib_destroy_fmr_pool(ha->fmr_pool);
+               ha->fmr_pool = 0;
+       } else {
+               dprintk(TRC_IB_ERRS, vhba, "fmr pool ptr is null!\n");
+       }
+}
+
+int vhba_map_buf_fmr(struct virtual_hba *vhba, u64 *phys_addr, int num_pgs,
+                    u64 *mapped_fmr_iova, struct srb *sp, int index)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+
+       if (!ha->fmr_pool) {
+               eprintk(vhba, "Error - null fmr pool ptr\n");
+               ha->stats.fmr_stats.map_fail_cnt++;
+               /* Revisit: Correct return value is -1 */
+               return 0;
+       }
+       sp->pool_fmr[index] = ib_fmr_pool_map_phys(ha->fmr_pool,
+                                                  phys_addr, num_pgs,
+                                                  *mapped_fmr_iova, NULL);
+
+       if (IS_ERR(sp->pool_fmr[index])) {
+               eprintk(vhba, "Error - pool fmr index map failed [%ld/%p]\n",
+                       IS_ERR_VALUE((unsigned long)sp->pool_fmr[index]),
+                       sp->pool_fmr[index]);
+               ha->stats.fmr_stats.map_fail_cnt++;
+               return -1;
+       }
+       ha->stats.fmr_stats.map_cnt++;
+       return 0;
+}
+
+void vhba_unmap_buf_fmr(struct virtual_hba *vhba, struct srb *sp, int tot_dsds)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       int index;
+
+       for (index = 0; index < tot_dsds; index++) {
+               if (sp->pool_fmr[index]) {
+                       ib_fmr_pool_unmap(sp->pool_fmr[index]);
+                       sp->pool_fmr[index] = 0;
+               }
+       }
+       ha->stats.fmr_stats.unmap_cnt++;
+}
+
+int vhba_send_init_blk(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       int ret;
+       struct init_block *init_blk;
+
+       vhba_init_rings(vhba);
+
+       if (!ha->request_ring) {
+               eprintk(vhba, "Error - null req ring ptr. Returning 1\n");
+               return 1;
+       }
+
+       init_blk = &ha->init_blk;
+
+       memset(init_blk, 0, sizeof(struct init_block));
+
+       init_blk->type = INIT_BLOCK;
+       init_blk->entry_size = sizeof(struct cmd_type_7);
+       init_blk->ring_size = ha->request_q_length;
+       init_blk->read_index_addr = ha->req_ring_rindex_dma;
+       init_blk->read_index_rkey = vhba->xsmp_info.mr->rkey;
+       init_blk->base_addr = ha->request_dma;
+       init_blk->base_addr_rkey = vhba->xsmp_info.mr->rkey;
+
+       dprintk(TRC_IB, vhba, "base (%Lx), rkey (%0x)\n",
+               init_blk->base_addr, init_blk->base_addr_rkey);
+       dprintk(TRC_IB, vhba, "read (%Lx), rrkey (%0x)\n",
+               init_blk->read_index_addr, init_blk->read_index_rkey);
+       dprintk(TRC_IB, vhba, "ring (%0x), entry (%0x)\n",
+               init_blk->ring_size, init_blk->entry_size);
+
+/* Init block index is 2048 (not overlapping with write_index 0 - 1023) */
+
+       ret = xscore_post_send(&vhba->data_conn.ctx, (u8 *) init_blk,
+                              sizeof(struct init_block), XSCORE_DEFER_PROCESS);
+
+       if (ret) {
+               eprintk(vhba, "xscore_post_send() failed\n");
+               ha->stats.ib_stats.dqp_send_err_cnt++;
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               return 1;
+       }
+       return 0;
+}
+
+int vhba_send_write_index(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct vhba_write_index_msg *send_write_index_msg = 0;
+       int ret;
+
+       if ((ha->data_qp_handle == XSCORE_CONN_ERR) ||
+           (ha->control_qp_handle == XSCORE_CONN_ERR)) {
+               dprintk(TRC_IB_ERRS, vhba, "IB handle is -1\n");
+               return 1;
+       }
+       if ((ha->req_ring_windex < 0) ||
+           (ha->req_ring_windex >= ha->request_q_length)) {
+               eprintk(vhba, "Error - invalid req_ring_windex %d\n"
+                       " in vhba_send_write_index\n", ha->req_ring_windex);
+               return 1;
+       }
+
+       if (!ha->send_buf_ptr[ha->req_ring_windex]) {
+               ha->send_buf_ptr[ha->req_ring_windex] =
+                   kmalloc(sizeof(struct vhba_write_index_msg), GFP_ATOMIC);
+               if (!ha->send_buf_ptr[ha->req_ring_windex]) {
+                       eprintk(vhba, "Error - kmalloc failed!\n");
+                       return 1;
+               }
+       }
+
+       send_write_index_msg = ha->send_buf_ptr[ha->req_ring_windex];
+       ha->send_write_index_msg = send_write_index_msg;
+
+       if (!send_write_index_msg) {
+               eprintk(vhba, "Error - null send write index msg ptr.\n"
+                       "       Returning 1\n");
+               return 1;
+       }
+
+       send_write_index_msg->type = WRITE_INDEX_UPDATE;
+       send_write_index_msg->_reserved1 = 0x0;
+       send_write_index_msg->_reserved = 0x0;
+       send_write_index_msg->write_index = ha->req_ring_windex;
+
+       ret = xscore_post_send(&vhba->data_conn.ctx,
+                              (u8 *) send_write_index_msg,
+                              sizeof(struct vhba_write_index_msg),
+                              XSCORE_DEFER_PROCESS);
+       if (ret) {
+               eprintk(vhba, "Error - xsigo ib send msg failed?\n");
+               send_write_index_msg = 0;
+               ha->stats.ib_stats.dqp_send_err_cnt++;
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               ib_link_down(ha);
+               return 1;
+       }
+
+       return 0;
+}
+
+int vhba_send_heart_beat(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct heart_beat_msg *hb_msg;
+       int ret = 0;
+
+       dprintk(TRC_FUNCS, vhba, "Entering...\n");
+
+       if (atomic_read(&ha->qp_status) != VHBA_QP_CONNECTED)
+               return 1;
+
+       if (atomic_read(&ha->ib_status) != VHBA_IB_UP)
+               return 1;
+
+       dprintk(TRC_IB, vhba, "handle is %d\n", ha->control_qp_handle);
+
+       hb_msg = kmalloc(sizeof(struct heart_beat_msg), GFP_ATOMIC);
+       if (!hb_msg) {
+               dprintk(TRC_IB_ERRS, vhba, "heart beat msg is not valid\n");
+               return 1;
+       }
+
+       hb_msg->type = VHBA_HEART_BEAT;
+       hb_msg->rsvd = 0;
+
+       dprintk(TRC_IB, vhba,
+               "sending hear beat msg on handle %d\n", ha->control_qp_handle);
+
+       if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED) {
+               dprintk(TRC_IB, vhba, "cqp hdl %d hb_msg ptr %p\n",
+                       ha->control_qp_handle, hb_msg);
+               ret = xscore_post_send(&vhba->ctrl_conn.ctx,
+                                      (u8 *) hb_msg,
+                                      sizeof(struct heart_beat_msg),
+                                      XSCORE_DEFER_PROCESS);
+       }
+
+       if (ret) {
+               ha->stats.ib_stats.cqp_send_err_cnt++;
+               dprintk(TRC_IB_ERRS, vhba, "heart beat msg failed\n");
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               kfree(hb_msg);
+       }
+
+       dprintk(TRC_FUNCS, vhba, "Returning...\n");
+       return 0;
+}
+
+int vhba_send_enable_vhba(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct enable_msg *enable_msg;
+       int ret = 1;
+
+       enable_msg = kmalloc(sizeof(struct enable_msg), GFP_ATOMIC);
+       if (!enable_msg) {
+               dprintk(TRC_IB_ERRS, vhba, "enable_msg malloc error\n");
+               return 1;
+       }
+
+       memset(enable_msg, 0, sizeof(struct enable_msg));
+
+       enable_msg->type = ENABLE_VHBA_Q;
+       enable_msg->rsvd = 0;
+
+       dprintk(TRC_INFO, vhba, "sending enable vhba msg on Control Q Pair\n");
+
+       if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED) {
+               ret = xscore_post_send(&vhba->ctrl_conn.ctx,
+                                      (u8 *) enable_msg,
+                                      sizeof(struct enable_msg),
+                                      XSCORE_DEFER_PROCESS);
+       }
+       if (ret) {
+               ha->stats.ib_stats.cqp_send_err_cnt++;
+               eprintk(vhba, "Error - xscore_post_send() failed\n");
+               ib_link_down(ha);
+               kfree(enable_msg);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+       }
+       ha->stats.fc_stats.enable_msg_cnt++;
+       return 0;
+}
+
+void vhba_data_send_comp_handler(void *client_arg, void *msg, int status, int n)
+{
+       u32 idr = (u32) (unsigned long)client_arg;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       vhba = vhba_get_context_by_idr(idr);
+       if (!vhba) {
+               eprintk(NULL, "Invalid client_arg received\n");
+               return;
+       }
+       ha = vhba->ha;
+
+       if (status) {
+               eprintk(vhba, "Data Send Completion error: status %d\n",
+                       status);
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               goto out;
+       }
+out:
+       DEC_REF_CNT(vhba);
+}
+
+void vhba_ctrl_send_comp_handler(void *client_arg, void *msg, int status, int n)
+{
+       u32 idr = (u32) (unsigned long)client_arg;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       vhba = vhba_get_context_by_idr(idr);
+       if (!vhba) {
+               eprintk(NULL, "Invalid client_arg received\n");
+               return;
+       }
+       ha = vhba->ha;
+
+       if (status) {
+               eprintk(vhba, "Ctrl Send Completion error: status %d\n",
+                       status);
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               goto out;
+       }
+       kfree(msg);
+out:
+       DEC_REF_CNT(vhba);
+}
+
+void vhba_cqp_recv_comp_handler(void *client_arg, void *msg, int sz,
+                               int status, int n)
+{
+       u32 idr = (u32) (unsigned long)client_arg;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       vhba = vhba_get_context_by_idr(idr);
+       if (!vhba) {
+               eprintk(NULL, "Invalid client_arg received\n");
+               kfree(msg);
+               return;
+       }
+       ha = vhba->ha;
+
+       if (status) {
+               eprintk(vhba, "CQP Recv Completion error: status %d\n", status);
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               goto out;
+       }
+       process_cqp_msg(vhba, msg, sz);
+out:
+       kfree(msg);
+       DEC_REF_CNT(vhba);
+}
+
+/*
+ * Called from interrupt context
+ */
+
+void vhba_recv_comp_handler(void *client_arg, void *msg, int sz,
+                           int status, int n)
+{
+       u32 idr = (u32) (unsigned long)client_arg;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       vhba = vhba_get_context_by_idr(idr);
+       if (!vhba) {
+               eprintk(NULL, "Invalid client_arg received\n");
+               return;
+       }
+       ha = vhba->ha;
+
+       if (status) {
+               eprintk(vhba, "Recv Completion error: status %d\n", status);
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               kfree(msg);
+               DEC_REF_CNT(vhba);
+               return;
+       }
+       process_dqp_msg(vhba, msg, sz);
+
+       kfree(msg);
+       DEC_REF_CNT(vhba);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_ib.h b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_ib.h
new file mode 100644 (file)
index 0000000..0574522
--- /dev/null
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __VHBA_IB_H__
+#define __VHBA_IB_H__
+
+#include <linux/types.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_fmr_pool.h>
+#include "vhba_os_def.h"
+
+/* Control queue pair defines*/
+#define VHBA_CQP_SEND_Q_SZ             64
+#define VHBA_CQP_RECV_Q_SZ             64
+#define VHBA_CQP_MAX_BUF_SIZE          1024
+#define VHBA_CQP_MAX_CTRL_MSG_SIZE     1024
+
+/* Data queue pair defines */
+#define VHBA_DQP_SEND_Q_SZ             1400
+#define VHBA_DQP_RECV_Q_SZ             1400
+#define VHBA_DQP_MAX_BUF_SIZE          256
+#define VHBA_DQP_MAX_CTRL_MSG_SIZE     256
+
+/* VHBA QP States */
+#define VHBA_QP_NOTCONNECTED           0
+#define VHBA_QP_TRYCONNECTING          1
+#define VHBA_QP_RECONNECTING           2
+#define VHBA_QP_PARTIAL_CONNECT                3
+#define VHBA_QP_CONNECTED              4
+#define VHBA_QP_DISCONNECTED           5
+
+#define VHBA_IB_UP                     0
+#define VHBA_IB_DOWN                   1
+#define VHBA_IB_DEAD                   2
+
+/* Queue pair type */
+#define QP_TYPE_CONTROL                        0
+#define QP_TYPE_DATA                   1
+
+/* Data queue pair direction */
+#define DATA_QP_TYPE_TX                        1
+#define DATA_QP_TYPE_RX                        2
+
+/* FMR defines */
+#define VHBA_FMR_POOL_SIZE             256
+#define VHBA_MAX_TRANSFER_SIZE         4080
+#define VHBA_DEFAULT_TRANSFER_SIZE     512
+#define VHBA_MAX_FMR_PAGES             (((VHBA_DEFAULT_TRANSFER_SIZE * 512)/ \
+                                                       (PAGE_SIZE)) + 2)
+#define VHBA_FMR_DIRTY_MARK            32
+#define VHBA_MAX_DSDS_IN_FMR           ((VHBA_DEFAULT_TRANSFER_SIZE * 512)/ \
+                                                               (PAGE_SIZE))
+
+#define TCA_SERVICE_ID 0x1001ULL
+
+struct scsi_xg_vhba_host;
+struct srb;
+
+int vhba_init_rings(struct virtual_hba *vhba);
+void process_cqp_msg(struct virtual_hba *vhba, u8 *msg, int length);
+void process_dqp_msg(struct virtual_hba *vhba, u8 *msg, int length);
+int vhba_xsmp_notify(xsmp_cookie_t xsmp_hndl, u64 resource_id, int notifycmd);
+
+void vhba_control_callback(void *client_arg, int event);
+void vhba_data_callback(void *client_arg, int event);
+
+int vhba_ib_disconnect_qp(struct virtual_hba *vhba);
+int vhba_ib_connect_qp(struct virtual_hba *vhba);
+int vhba_conn_init(struct virtual_hba *vhba);
+void vhba_unmap_buf_fmr(struct virtual_hba *vhba, struct srb *sp, int tot_dsds);
+void sp_put(struct virtual_hba *vhba, struct srb *sp);
+int vhba_map_buf_fmr(struct virtual_hba *vhba, u64 *phys_addr, int num_pgs,
+                    u64 *mapped_fmr_iova, struct srb *sp, int index);
+int vhba_send_write_index(struct virtual_hba *vhba);
+int readjust_io_addr(struct srb *sp);
+int vhba_alloc_fmr_pool(struct virtual_hba *vhba);
+void vhba_dealloc_fmr_pool(struct virtual_hba *vhba);
+extern void vhba_ctrl_send_comp_handler(void *client_arg, void *msg, int status,
+                       int n);
+extern void vhba_recv_comp_handler(void *client_arg, void *msg, int sz,
+                       int status, int n);
+extern void vhba_cqp_recv_comp_handler(void *client_arg, void *msg, int sz,
+                       int status, int n);
+
+#endif /* __VHBA_IB_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_init.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_init.c
new file mode 100644 (file)
index 0000000..2ae9a80
--- /dev/null
@@ -0,0 +1,2966 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * The VHBA driver is an i/f driver for the Xsigo virtual HBA (VHBA)
+ */
+
+#include <linux/delay.h>
+#include <linux/highmem.h>
+
+#include <scsi/scsi.h>
+#include <linux/interrupt.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_transport_fc.h>
+#include <rdma/ib_verbs.h>
+
+#include "vhba_ib.h"
+#include "vhba_defs.h"
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+#include "vhba_align.h"
+#include "vhba_scsi_intf.h"
+
+#include "xs_compat.h"
+
+static u32 vhba_target_bind(struct virtual_hba *vhba, u32 loop_id,
+                           u8 *nwwn, u8 *pwwn, u32 port_id, s32 bound_value,
+                           u32 lun_count, u8 *lun_map, u16 *lun_id,
+                           u8 media_type);
+static u32 vhba_map_unbound_targets(struct virtual_hba *vhba);
+static struct os_tgt *vhba_tgt_alloc(struct virtual_hba *vhba, u32 tgt);
+static void process_status_cont_entry(struct virtual_hba *vhba,
+                                     struct sts_cont_entry *pkt);
+
+#define VHBA_CMD_TIMEOUT 18
+
+static inline void add_to_disc_ports(struct fc_port *fcport,
+                                    struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ha->list_lock, flags);
+       list_add_tail(&fcport->list, &ha->disc_ports);
+       spin_unlock_irqrestore(&ha->list_lock, flags);
+}
+
+int vhba_initialize(struct virtual_hba *vhba, struct vhba_xsmp_msg *msg)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       int rval = 0;
+
+       ha->flags.online = 0;
+       ha->device_flags = 0;
+
+       rval = vhba_alloc_fmr_pool(vhba);
+       if (rval) {
+               eprintk(vhba, "Trouble allocating FMR pool.\n"
+                       " Returning %d\n", rval);
+               return -1;
+       }
+
+       /* Initialize VHBA request, IB queues, etc */
+       rval = vhba_init_rings(vhba);
+       if (rval) {
+               eprintk(vhba, "Trouble initializing rings.\n"
+                       " Returning %d\n", rval);
+               vhba_dealloc_fmr_pool(vhba);
+       }
+       return rval;
+}
+
+int vhba_init_rings(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       int i;
+
+       for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++)
+               ha->outstanding_cmds[i] = NULL;
+
+       ha->current_outstanding_cmd = 0;
+
+       ha->request_ring_ptr = ha->request_ring;
+       *ha->req_ring_rindex = 0;
+       ha->req_ring_windex = 0;
+       ha->req_q_cnt = ha->request_q_length;
+
+       return 0;
+}
+
+void complete_cmd_and_callback(struct virtual_hba *vhba, struct srb *sp,
+                              struct scsi_cmnd *cp)
+{
+       int sg_count;
+       u32 request_bufflen;
+       struct scatterlist *request_buffer;
+
+       /*
+        * Grab the outstanding command
+        * make the callback and pass the status
+        */
+       if (sp && cp) {
+               if (sp->cmd != NULL) {
+                       sg_count = scsi_sg_count(sp->cmd);
+                       request_buffer = scsi_sglist(sp->cmd);
+                       request_bufflen = scsi_bufflen(sp->cmd);
+
+                       if (sp->flags & SRB_DMA_VALID) {
+                               sp->flags &= ~SRB_DMA_VALID;
+                               /* Ummap the memory used for this I/O */
+                               if (sg_count) {
+                                       ib_dma_unmap_sg(vhba->
+                                                       xsmp_info.ib_device,
+                                                       request_buffer,
+                                                       sg_count,
+                                                       sp->
+                                                       cmd->sc_data_direction);
+
+                                       vhba_unmap_buf_fmr(vhba,
+                                                          sp, sp->tot_dsds);
+
+                               } else if (request_bufflen) {
+                                       ib_dma_unmap_single(vhba->xsmp_info.
+                                               ib_device, sp->dma_handle,
+                                               request_bufflen,
+                                               sp->cmd->sc_data_direction);
+
+                                       vhba_unmap_buf_fmr(vhba, sp,
+                                                          sp->tot_dsds);
+                                       if (sp->unaligned_sg)
+                                               vhba_tear_bounce_buffer(sp);
+                               }
+                       }
+               } else
+                       dprintk(TRC_ERRORS, vhba, "sp cmd null\n");
+               sp_put(vhba, sp);
+       }
+}
+
+int vhba_purge_pending_ios(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct srb *sp;
+       struct scsi_cmnd *cp;
+       int i, queue_num;
+       unsigned long flags = 0;
+
+       spin_lock_irqsave(&ha->io_lock, flags);
+       for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+               if (ha->outstanding_cmds[i]) {
+                       sp = ha->outstanding_cmds[i];
+                       cp = sp->cmd;
+                       cp->result = DID_NO_CONNECT << 16;
+                       /* Delete SCSI timer */
+                       if (sp->timer.function != NULL) {
+                               del_timer(&sp->timer);
+                               sp->timer.function = NULL;
+                               sp->timer.data = (unsigned long)NULL;
+                       }
+                       ha->outstanding_cmds[i] = NULL;
+                       CMD_SP(sp->cmd) = NULL;
+                       spin_unlock_irqrestore(&ha->io_lock, flags);
+                       complete_cmd_and_callback(vhba, sp, cp);
+                       DEC_REF_CNT(vhba);
+                       spin_lock_irqsave(&ha->io_lock, flags);
+                       queue_num = sp->queue_num;
+
+                       dprintk(TRC_SCSI, vhba,
+                               "dec q cnt for vhba %p q %d\n",
+                               vhba, queue_num);
+                       if (atomic_read
+                           (&ha->stats.io_stats.num_vh_q_reqs[queue_num]) != 0)
+                               atomic_dec(&ha->stats.io_stats.
+                                          num_vh_q_reqs[queue_num]);
+               }
+       }
+       spin_unlock_irqrestore(&ha->io_lock, flags);
+       return 0;
+}
+
+void vhba_taskmgmt_flush_ios(struct virtual_hba *vhba, int tgt_id, int lun,
+                            int lun_reset_flag)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct srb *sp;
+       struct scsi_cmnd *cp;
+       int i, queue_num;
+       unsigned long flags = 0;
+
+       spin_lock_irqsave(&ha->io_lock, flags);
+       for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+               if (ha->outstanding_cmds[i]) {
+                       sp = ha->outstanding_cmds[i];
+                       cp = sp->cmd;
+                       if ((lun_reset_flag && (cp->device->id == tgt_id) &&
+                            (cp->device->lun == lun)) ||
+                           ((lun_reset_flag == 0) &&
+                            (cp->device->id == tgt_id))) {
+
+                               cp->result = DID_NO_CONNECT << 16;
+                               if (sp->timer.function != NULL) {
+                                       del_timer(&sp->timer);
+                                       sp->timer.function = NULL;
+                                       sp->timer.data = (unsigned long)NULL;
+                               }
+                               ha->outstanding_cmds[i] = NULL;
+                               CMD_SP(sp->cmd) = NULL;
+                               spin_unlock_irqrestore(&ha->io_lock, flags);
+
+                               complete_cmd_and_callback(vhba, sp, cp);
+                               DEC_REF_CNT(vhba);
+
+                               spin_lock_irqsave(&ha->io_lock, flags);
+
+                               queue_num = sp->queue_num;
+
+                               dprintk(TRC_SCSI, vhba,
+                                       "dec q cnt for vhba %p q %d\n",
+                                       vhba, queue_num);
+                               if (atomic_read
+                                   (&ha->stats.io_stats.
+                                    num_vh_q_reqs[queue_num]) != 0)
+                                       atomic_dec(&ha->stats.io_stats.
+                                                  num_vh_q_reqs[queue_num]);
+                       }
+               }
+       }
+       spin_unlock_irqrestore(&ha->io_lock, flags);
+}
+
+void process_status_entry(struct virtual_hba *vhba, struct sts_entry_24xx *sts)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct srb *sp;
+       struct scsi_cmnd *cp;
+       struct os_tgt *tq;
+       unsigned long flags;
+       u8 *rsp_info, *sense_data;
+       u8 *cdb_ptr, *byte_ptr;
+       u8 lscsi_status;
+       u16 comp_status, scsi_status;
+       s32 resid;
+       u32 sense_len, rsp_info_len, resid_len;
+       u32 queue_num;
+       u32 request_bufflen;
+
+       byte_ptr = (u8 *) sts;
+       byte_ptr = byte_ptr + 8;
+       sts = (struct sts_entry_24xx *)byte_ptr;
+       cdb_ptr = byte_ptr;
+
+       sts->handle &= 0x000003ff;
+       comp_status = le16_to_cpu(sts->comp_status);
+       scsi_status = le16_to_cpu(sts->scsi_status) & SS_MASK;
+
+       ha->stats.io_stats.total_io_rsp++;
+
+       dprintk(TRC_IO, vhba, "comp status %x scsi_status %x handle %x\n",
+               (int)le16_to_cpu(sts->comp_status),
+               (int)le16_to_cpu(sts->scsi_status),
+               (int)le16_to_cpu(sts->handle));
+
+       if (sts->handle < MAX_OUTSTANDING_COMMANDS) {
+
+               spin_lock_irqsave(&ha->io_lock, flags);
+               sp = ha->outstanding_cmds[sts->handle];
+
+               if (sp) {
+                       queue_num = sp->queue_num;
+
+                       atomic_dec(&ha->stats.
+                                  io_stats.num_vh_q_reqs[queue_num]);
+
+                       if (sp->state == VHBA_IO_STATE_ABORTING) {
+                               dprintk(TRC_INFO, vhba,
+                                       "Aborting IO: sp:0x%p, sp->cmd:0x%p\n",
+                                       sp, sp->cmd);
+
+                               dprintk(TRC_ERR_RECOV, vhba,
+                                       "scsi_status= 0x%x\n",
+                                       (int)le16_to_cpu(sts->scsi_status));
+
+                               sp->state = VHBA_IO_STATE_ABORTED;
+                               sp->abort_cnt = 0;
+                               spin_unlock_irqrestore(&ha->io_lock, flags);
+                               return;
+                       }
+                       if (sp->state == VHBA_IO_STATE_ABORT_FAILED) {
+                               sp->state = VHBA_IO_STATE_ABORT_NEEDED;
+                               sp->abort_cnt = 0;
+                               spin_unlock_irqrestore(&ha->io_lock, flags);
+                               return;
+                       }
+
+                       ha->outstanding_cmds[sts->handle] = NULL;
+                       CMD_SP(sp->cmd) = NULL;
+                       spin_unlock_irqrestore(&ha->io_lock, flags);
+
+               } else {
+                       spin_unlock_irqrestore(&ha->io_lock, flags);
+                       dprintk(TRC_SCSI_ERRS, vhba, "sp is null for hndl %d\n",
+                               (int)sts->handle);
+               }
+
+       } else if (sts->handle == MAX_OUTSTANDING_COMMANDS) {
+               /*
+                * This indicates completion of a tsk mgmt command
+                * No corresponding sp to worry about
+                */
+               dprintk(TRC_ERRORS, vhba,
+                       "Returning erroneously: hndl is 1024!\n");
+               return;
+       } else
+               sp = NULL;
+
+       if (sp == NULL) {
+               dprintk(TRC_SCSI_ERRS, vhba, "sp is null. sts_handle= %u\n"
+                       " curr hndl = %u\n", (u32) sts->handle,
+                       (u32) ha->current_outstanding_cmd);
+               /* Reset this adapter or I/O card, etc */
+               return;
+       }
+
+       cp = sp->cmd;
+       if (cp == NULL) {
+               dprintk(TRC_ERRORS, vhba, "cmd already returned to OS\n"
+                       " hndl %u sp %p sp->state %d\n",
+                       (u32)sts->handle, sp, sp->state);
+               return;
+       }
+       /*
+        * When abort is happening (sp is searched) so can't change
+        * the sp. Quietly store this response somewhere to be
+        * processed once this sp search is over
+        */
+       if (sp->state == 1) {
+               dprintk(TRC_ERRORS, vhba, "Command already aborted\n");
+               return;
+       }
+       request_bufflen = scsi_bufflen(sp->cmd);
+
+       /* Delete SCSI timer */
+       if (sp->timer.function != NULL) {
+               del_timer(&sp->timer);
+               sp->timer.function = NULL;
+               sp->timer.data = (unsigned long)NULL;
+       }
+
+       if (sts->entry_type == COMMAND_TYPE_7) {
+               dprintk(TRC_ERRORS, vhba,
+                       "Received type 7 iocb back from QL\n");
+               cp->result = DID_NO_CONNECT << 16;
+               complete_cmd_and_callback(vhba, sp, cp);
+               DEC_REF_CNT(vhba);
+               return;
+       }
+
+       /* Decrement actthreads if used */
+
+       lscsi_status = scsi_status & STATUS_MASK;
+
+       CMD_ENTRY_STATUS(cp) = sts->entry_status;
+       CMD_COMPL_STATUS(cp) = comp_status;
+       CMD_SCSI_STATUS(cp) = scsi_status;
+
+       sense_len = rsp_info_len = resid_len = 0;
+
+       sense_len = le32_to_cpu(sts->sense_len);
+       rsp_info_len = le32_to_cpu(sts->rsp_data_len);
+       resid_len = le32_to_cpu(sts->rsp_residual_count);
+       rsp_info = sts->data;
+       sense_data = sts->data;
+       host_to_fcp_swap(sts->data, sizeof(sts->data));
+
+       /* Check for any FCP transport errors. */
+       if (scsi_status & SS_RESPONSE_INFO_LEN_VALID) {
+               sense_data += rsp_info_len;
+               if (rsp_info_len > 3 && rsp_info[3]) {
+                       eprintk(vhba,
+                               "scsi(%ld:%d:%d:%d) FCP I/O protocol failure ",
+                               ha->host_no, cp->device->channel,
+                               (int)cp->device->id, (int)cp->device->lun);
+                       eprintk(vhba,
+                               " (%x/%02x%02x%02x%02x%02x%02x%02x%02x)... ",
+                               rsp_info_len, rsp_info[0], rsp_info[1],
+                               rsp_info[2], rsp_info[3], rsp_info[4],
+                               rsp_info[5], rsp_info[6], rsp_info[7]);
+                       eprintk(vhba, "retrying command\n");
+                       cp->result = DID_BUS_BUSY << 16;
+                       complete_cmd_and_callback(vhba, sp, cp);
+                       DEC_REF_CNT(vhba);
+                       return;
+               }
+       } else {
+               rsp_info_len = 0;
+       }
+
+       /* Based on Host and scsi status generate status code for Linux */
+       switch (comp_status) {
+       case CS_COMPLETE:
+               if (scsi_status == 0) {
+                       dprintk(TRC_IO, vhba, "hndl %d: sts ok\n",
+                               (int)sts->handle);
+                       cp->result = DID_OK << 16;
+                       break;
+               }
+
+               if (scsi_status & (SS_RESIDUAL_UNDER | SS_RESIDUAL_OVER)) {
+                       resid = resid_len;
+                       scsi_set_resid(cp, resid);
+                       CMD_RESID_LEN(cp) = resid;
+               }
+
+               cp->result = DID_OK << 16 | lscsi_status;
+               if (lscsi_status == SS_BUSY_CONDITION)
+                       break;
+               if (lscsi_status != SS_CHECK_CONDITION)
+                       break;
+
+               /* Copy Sense Data into sense buffer. */
+               memset(cp->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
+
+               if (!(scsi_status & SS_SENSE_LEN_VALID))
+                       break;
+
+               if (sense_len >= sizeof(cp->sense_buffer))
+                       sense_len = sizeof(cp->sense_buffer);
+
+               sp->request_sense_length = sense_len;
+               sp->request_sense_ptr = cp->sense_buffer;
+
+               if (sp->request_sense_length >
+                   (sizeof(sts->data) - rsp_info_len))
+                       sense_len = sizeof(sts->data) - rsp_info_len;
+
+               memcpy(cp->sense_buffer, sense_data, sense_len);
+               CMD_ACTUAL_SNSLEN(cp) = sense_len;
+               sp->request_sense_ptr += sense_len;
+               sp->request_sense_length -= sense_len;
+               if (sp->request_sense_length != 0)
+                       ha->status_srb = sp;
+
+               dprintk(TRC_SCSI_ERRS, vhba, "Check condition Sense data,\n"
+                       "scsi(%ld:%d:%d:%d) scsi_status = %d\n",
+                       (long)ha->host_no, (int)cp->device->channel,
+                       (int)cp->device->id, (int)cp->device->lun, scsi_status);
+
+               break;
+
+       case CS_DATA_UNDERRUN:
+               dprintk(TRC_SCSI, vhba, "UNDERRUN detected\n");
+
+               resid = resid_len;
+               if (scsi_status & SS_RESIDUAL_UNDER) {
+                       scsi_set_resid(cp, resid);
+                       CMD_RESID_LEN(cp) = resid;
+               }
+
+               /*
+                * Check to see if SCSI Status is non zero. If so report SCSI
+                * Status.
+                */
+               if (lscsi_status != 0) {
+                       cp->result = DID_OK << 16 | lscsi_status;
+                       if (lscsi_status == SS_BUSY_CONDITION)
+                               break;
+                       if (lscsi_status != SS_CHECK_CONDITION)
+                               break;
+
+                       /* Copy Sense Data into sense buffer */
+                       memset(cp->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
+
+                       if (!(scsi_status & SS_SENSE_LEN_VALID))
+                               break;
+
+                       if (sense_len >= sizeof(cp->sense_buffer))
+                               sense_len = sizeof(cp->sense_buffer);
+
+                       sp->request_sense_length = sense_len;
+                       sp->request_sense_ptr = cp->sense_buffer;
+
+                       if (sp->request_sense_length >
+                           (sizeof(sts->data) - rsp_info_len))
+                               sense_len = sizeof(sts->data) - rsp_info_len;
+
+                       memcpy(cp->sense_buffer, sense_data, sense_len);
+                       CMD_ACTUAL_SNSLEN(cp) = sense_len;
+
+                       sp->request_sense_ptr += sense_len;
+                       sp->request_sense_length -= sense_len;
+                       if (sp->request_sense_length != 0)
+                               ha->status_srb = sp;
+
+                       dprintk(TRC_SCSI_ERRS, vhba,
+                               "Check condition Sense data, ");
+                       dprintk(TRC_SCSI_ERRS, vhba,
+                               "scsi(%ld:%d:%d:%d) cmd=%p pid=%ld\n",
+                               ha->host_no, cp->device->channel,
+                               (int)cp->device->id, (int)cp->device->lun, cp,
+                               cp->serial_number);
+
+               } else {
+
+                       /*
+                        * If RISC reports underrun and target does not report
+                        * it then we must have a lost frame, so tell upper
+                        * layer to retry it by reporting a bus busy.
+                        */
+                       if (!(scsi_status & SS_RESIDUAL_UNDER)) {
+                               eprintk(vhba, "scsi(%ld:%d:%d:%d) Dropped\n",
+                                       ha->host_no, cp->device->channel,
+                                       (int)cp->device->id,
+                                       (int)cp->device->lun);
+                               eprintk(vhba,
+                                       "frame(s) detected (%x of %d bytes)..",
+                                       resid, (u32)request_bufflen);
+                               eprintk(vhba, "retrying command.\n");
+
+                               cp->result = DID_BUS_BUSY << 16;
+
+                               break;
+                       }
+
+                       /* Handle mid-layer underflow */
+                       if ((unsigned)(request_bufflen - resid) <
+                                                               cp->underflow) {
+                               eprintk(vhba, "scsi(%ld:%d:%d:%d):Mid-layer\n",
+                                       ha->host_no, cp->device->channel,
+                                       (int)cp->device->id,
+                                       (int)cp->device->lun);
+                               eprintk(vhba,
+                                       "underflow detected (%x of %d bytes) ",
+                                       resid, (u32)request_bufflen);
+                               eprintk(vhba, "...returning error status.\n");
+                               cp->result = DID_ERROR << 16;
+                               break;
+                       }
+
+                       cp->result = DID_OK << 16;
+               }
+               break;
+
+       case CS_DATA_OVERRUN:
+
+               eprintk(vhba, "scsi(%ld:%d:%d): OVERRUN status detected\n",
+                       ha->host_no, (int)cp->device->id, (int)cp->device->lun);
+               eprintk(vhba, " 0x%x-0x%x\n", comp_status, scsi_status);
+               dprintk(TRC_SCSI_ERRS, vhba, "CDB: 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+                       cp->cmnd[0], cp->cmnd[1], cp->cmnd[2], cp->cmnd[3],
+                       cp->cmnd[4]);
+               dprintk(TRC_SCSI_ERRS, vhba, " 0x%x\n", cp->cmnd[5]);
+
+               dprintk(TRC_SCSI_ERRS, vhba, "PID=0x%lx req=0x%x xtra=0x%x --",
+                       cp->serial_number, request_bufflen, resid_len);
+               dprintk(TRC_SCSI_ERRS, vhba, "\nreturning DID_ERROR status\n");
+               cp->result = DID_ERROR << 16;
+               break;
+
+       case CS_PORT_LOGGED_OUT:
+       case CS_PORT_CONFIG_CHG:
+       case CS_PORT_BUSY:
+       case CS_INCOMPLETE:
+       case CS_PORT_UNAVAILABLE:
+               /*
+                * If the port is in Target Down state, return all IOs for this
+                * Target with DID_NO_CONNECT ELSE Queue the IOs in the
+                * retry_queue.
+                */
+               tq = TGT_Q(ha, cp->device->id);
+               if (tq) {
+                       dprintk(TRC_INFO, vhba,
+                               "Port Down: Logged Out/Unavailable: ");
+                       dprintk(TRC_INFO, vhba,
+                               "port_id:0x%x, PWWN:%lx comp_status=0x%x\n",
+                               tq->fcport->d_id.b24, (unsigned long)
+                               wwn_to_u64(tq->fcport->port_name), comp_status);
+               }
+               cp->result = DID_BUS_BUSY << 16;
+               break;
+
+       case CS_RESET:
+               dprintk(TRC_INFO, vhba,
+                       "CS_RESET:cp=%p, scsi_status=0x%x\n", cp, scsi_status);
+
+               cp->result = DID_RESET << 16;
+               break;
+
+       case CS_ABORTED:
+               /*
+                * hv2.19.12 - DID_ABORT does not retry the request if we
+                * aborted this request then abort otherwise it must be a
+                * reset.
+                */
+               dprintk(TRC_INFO, vhba,
+                       "CS_ABORTED, cp=%p, scsi_status=0x%x\n", cp,
+                       scsi_status);
+
+               cp->result = DID_RESET << 16;
+               break;
+
+       case CS_TIMEOUT:
+               cp->result = DID_BUS_BUSY << 16;
+
+               vhba->cs_timeout_count++;
+               dprintk(TRC_INFO, vhba,
+                       "CS_TIMEOUT for cmd=%p, opcode/len/status 0x%x/0x%x/0x%x\n",
+                       cp, cp->cmnd[0], scsi_bufflen(cp), scsi_status);
+               break;
+
+       case CS_QUEUE_FULL:
+               dprintk(TRC_INFO, vhba, "scsi(%ld): QUEUE FULL status\n",
+                        ha->host_no);
+               dprintk(TRC_INFO, vhba, " detected 0x%x-0x%x\n", comp_status,
+                       scsi_status);
+
+               /* SCSI Mid-Layer handles device queue full */
+               cp->result = DID_OK << 16 | lscsi_status;
+               break;
+
+       case CS_DMA:
+               dprintk(TRC_INFO, vhba, "dma error\n");
+               cp->result = DID_NO_CONNECT << 16;
+               break;
+
+       default:
+               eprintk(vhba, "SCSI error with unknown status\n");
+               eprintk(vhba, " 0x%x-0x%x\n", comp_status, scsi_status);
+
+               cp->result = DID_ERROR << 16;
+               break;
+       }
+
+       /* If no continuation stat */
+       if (ha->status_srb == NULL) {
+               complete_cmd_and_callback(vhba, sp, cp);
+               DEC_REF_CNT(vhba);
+       } else {
+               struct sts_cont_entry *ptr;
+
+               if (sts->entry_count > 1) {
+                       dprintk(TRC_SCSI_ERRS, vhba, "non null sts srb!\n");
+                       ptr = (struct sts_cont_entry *)(byte_ptr +
+                                                       sizeof(struct
+                                                              sts_entry_24xx));
+                       process_status_cont_entry(vhba, ptr);
+               } else {
+                       sp->request_sense_length = 0;
+                       complete_cmd_and_callback(vhba, sp, cp);
+                       DEC_REF_CNT(vhba);
+               }
+       }
+}
+
+static
+void process_status_cont_entry(struct virtual_hba *vhba,
+                              struct sts_cont_entry *pkt)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct srb *sp = ha->status_srb;
+       struct scsi_cmnd *cp;
+       u8 sense_sz;
+
+       if (sp != NULL) {
+               cp = sp->cmd;
+               if (cp == NULL) {
+                       eprintk(vhba, "Cmd already returned back\n");
+                       eprintk(vhba, " to OS sp %p sp->state %d\n",
+                               sp, sp->state);
+                       ha->status_srb = NULL;
+                       return;
+               }
+
+               if (sp->request_sense_length != 0) {
+                       if (sp->request_sense_length > sizeof(pkt->data))
+                               sense_sz = sizeof(pkt->data);
+                       else
+                               sense_sz = sp->request_sense_length;
+
+                       host_to_fcp_swap(pkt->data, sizeof(pkt->data));
+
+                       dprintk(TRC_IO, vhba, "memcpy of %d bytes\n", sense_sz);
+                       memcpy(sp->request_sense_ptr, pkt->data, sense_sz);
+
+                       ha->status_srb = NULL;
+               }
+               complete_cmd_and_callback(vhba, sp, cp);
+               DEC_REF_CNT(vhba);
+       }
+}
+
+void process_dqp_msg(struct virtual_hba *vhba, u8 *msg, int length)
+{
+       int type;
+       struct abort_entry_24xx *abt;
+
+       type = *(u8 *) (msg + 8);
+
+       if ((type == STATUS_TYPE) || (type == COMMAND_TYPE_7))
+               process_status_entry(vhba, (struct sts_entry_24xx *)msg);
+       else if (type == STATUS_CONT_TYPE)
+               process_status_cont_entry(vhba, (struct sts_cont_entry *)msg);
+       else if (type == ABORT_IOCB_TYPE) {
+               abt = (struct abort_entry_24xx *)msg;
+               if (abt->nport_handle) {
+                       eprintk(vhba, "Could not Abort the command indexed\n");
+                       eprintk(vhba, " by handle %d\n", abt->handle);
+               }
+       } else
+               eprintk(vhba, "Unknown message from VH\n");
+}
+
+int vhba_set_tgt_offline(struct virtual_hba *vhba, struct os_tgt *tq)
+{
+       int tgt = tq->fcport->os_target_id;
+
+       dprintk(TRC_TIMER, vhba, "RSCN: setting tgt %d offline\n", tgt);
+       atomic_set(&tq->fcport->state, FCS_DEVICE_LOST);
+
+       return 0;
+}
+
+int vhba_set_all_tgts_offline(struct virtual_hba *vhba)
+{
+       int tgt;
+       struct os_tgt *tq;
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+
+       for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+               tq = TGT_Q(ha, tgt);
+               if (!tq)
+                       continue;
+               vhba_set_tgt_offline(vhba, tq);
+       }
+       return 0;
+}
+
+int vhba_set_tgt_online(struct virtual_hba *vhba, struct os_tgt *tq)
+{
+       atomic_set(&tq->fcport->state, FCS_ONLINE);
+       set_bit(TQF_ONLINE, &tq->flags);
+       return 0;
+}
+
+static inline struct fc_rport *xg_rport_add(struct fc_port *fcport,
+                                           struct scsi_xg_vhba_host *ha)
+{
+       struct fc_rport_identifiers rport_ids;
+       struct fc_rport *rport;
+
+       rport_ids.node_name = wwn_to_u64(fcport->node_name);
+       rport_ids.port_name = wwn_to_u64(fcport->port_name);
+       rport_ids.port_id = fcport->d_id.b.domain << 16 |
+           fcport->d_id.b.area << 8 | fcport->d_id.b.al_pa;
+       rport_ids.roles = FC_PORT_ROLE_FCP_TARGET;      /* Hardcode the role */
+       fcport->rport = rport = fc_remote_port_add(ha->host, 0, &rport_ids);
+       if (!rport) {
+               pr_err("FC remote port add failed\n");
+               return NULL;
+       }
+       pr_info("scsi(%ld:%d)\n",  ha->host_no, fcport->os_target_id);
+       pr_info(" rport_add: PWWN:%lx NWWN:%lx PORT_ID:%x\n",
+               (unsigned long)rport_ids.port_name,
+               (unsigned long)rport_ids.node_name, rport_ids.port_id);
+       rport->supported_classes = fcport->supported_classes;
+       *((struct fc_port **) rport->dd_data) = fcport;
+       fc_remote_port_rolechg(rport, rport_ids.roles);
+       return rport;
+}
+
+void vhba_update_rports(struct work_struct *work)
+{
+       struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+                                                work);
+       int tgt;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       vhba = vhba_get_context_by_idr(xwork->idr);
+       if (vhba == NULL) {
+               dprintk(TRC_INFO, NULL,
+                       "Could not find vhba for updating rport\n");
+               goto out;
+       }
+       ha = vhba->ha;
+
+       for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+               struct os_tgt *tq;
+
+               tq = TGT_Q(ha, tgt);
+               if (tq && tq->fcport) {
+                       eprintk(vhba, "rport = %p, state = %d\n",
+                               tq->fcport->rport,
+                               atomic_read(&tq->fcport->state));
+
+                       if (atomic_read(&tq->fcport->state) == FCS_ONLINE) {
+                               /* Check if you've already reported the rport */
+                               if (tq->fcport->rport) {
+                                       continue;
+                               } else {
+                                       eprintk(vhba, "Updating rports\n");
+                                       tq->fcport->rport =
+                                           xg_rport_add(tq->fcport, ha);
+                                       if (!tq->fcport->rport)
+                                               eprintk(ha->vhba,
+                                                       "Error registering ");
+                                               eprintk(ha->vhba,
+                                                       "scsi(%ld:%d)\n",
+                                                       ha->host_no,
+                                                       tq->
+                                                       fcport->os_target_id);
+                               }
+                       } else {
+                               struct fc_rport *remote_port;
+
+                               if ((tq->fcport->rport) &&
+                                   (atomic_read(&tq->fcport->state)
+                                    == FCS_DEVICE_DEAD)) {
+                                       /* Target dead remove rport from OS */
+                                       eprintk(ha->vhba,
+                                               "removing scsi(%ld:%d) ",
+                                               ha->host_no,
+                                               tq->fcport->os_target_id);
+                                       eprintk(ha->vhba,
+                                               "state: 0x%x\n",
+                                               atomic_read(&tq->
+                                                           fcport->state));
+                                       remote_port = tq->fcport->rport;
+                                       tq->fcport->rport = NULL;
+                                       fc_remote_port_delete(remote_port);
+                               }
+                       }
+               }
+       }
+       DEC_REF_CNT(vhba);
+       vhba->scan_reqd = 1;
+out:
+       kfree(xwork);
+}
+
+void schedule_update_rports(struct scsi_xg_vhba_host *ha)
+{
+       struct xsvhba_work *xwork =
+           kmalloc(sizeof(struct xsvhba_work), GFP_ATOMIC);
+
+       if (!xwork) {
+               eprintk(NULL, "Error allocating work\n");
+               return;
+       }
+       xwork->idr = ha->vhba->idr;
+       INIT_WORK(&xwork->work, vhba_update_rports);
+       queue_work(vhba_workqueuep, &xwork->work);
+}
+
+void vhba_handle_scan(struct work_struct *work)
+{
+       struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+                                                work);
+
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       vhba = vhba_get_context_by_idr(xwork->idr);
+       if (vhba == NULL) {
+               dprintk(TRC_INFO, NULL, "Could not find vhba for scan\n");
+               goto out;
+       }
+       ha = vhba->ha;
+
+       if (atomic_read(&vhba->vhba_state) == VHBA_STATE_SCAN) {
+               if (vhba->scanned_once == 0) {
+                       vhba->scanned_once = 1;
+
+               } else {
+                       dprintk(TRC_INFO, vhba, "(target_count = %d)\n",
+                               ha->target_count);
+                       dprintk(TRC_INFO, vhba, " max_targets = %d)\n",
+                               ha->max_targets);
+                       if ((ha->target_count > 0) || (ha->max_targets > 0)) {
+                               u32 t_id;
+                               struct os_tgt *tq;
+                               struct scsi_device *device;
+
+                               dprintk(TRC_INFO, vhba,
+                                       "changing to VHBA_STATE_ACTIVE ");
+                               dprintk(TRC_INFO, vhba,
+                                       "since we have targets..\n");
+
+                               for (t_id = 0; t_id < ha->max_targets; t_id++) {
+                                       tq = TGT_Q(ha, t_id);
+                                       if (!tq)
+                                               continue;
+                                       if (atomic_read(&ha->link_state) !=
+                                           LINK_DOWN &&
+                                           atomic_read(&tq->fcport->state) !=
+                                           FCS_DEVICE_LOST) {
+                                               device =
+                                                   scsi_device_lookup(ha->host,
+                                                                      0, t_id,
+                                                                      0);
+
+                                               if (device == NULL)
+                                                       continue;
+                                               if (device->sdev_state ==
+                                                   SDEV_OFFLINE) {
+                                                       device->sdev_state =
+                                                           SDEV_RUNNING;
+                                               }
+                                               scsi_device_put(device);
+                                       }
+                               }
+                       }
+               }
+               atomic_set(&vhba->vhba_state, VHBA_STATE_ACTIVE);
+       }
+
+       /* on the first install, it seems like you might need to register
+          this here.  TGT update messages don't come in the first time.  */
+       schedule_update_rports(ha);
+
+       DEC_REF_CNT(vhba);
+out:
+       kfree(xwork);
+}
+
+void vhba_handle_targets(struct virtual_hba *vhba,
+                        struct vhba_tgt_status_msg tgt_status_msg, int *found)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct os_tgt *tq;
+       int loop_id = (u32) be16_to_cpu(tgt_status_msg.loop_id);
+       int tgt, k, lun_count;
+
+       for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+               tq = TGT_Q(ha, tgt);
+               if (tq && (memcmp(tgt_status_msg.wwpn, tq->fcport->port_name,
+                                 WWN_SIZE) == 0)) {
+                       *found = 1;
+                       if (atomic_read(&tq->fcport->state) != FCS_ONLINE) {
+                               ha->stats.fc_stats.rscn_up_cnt++;
+                               atomic_set(&tq->fcport->state, FCS_ONLINE);
+                               set_bit(TQF_ONLINE, &tq->flags);
+                               dprintk(TRC_INFO, vhba,
+                                       "RSCN:Target online");
+                               dprintk(TRC_INFO, vhba,
+                                       " msg received: PWWN: %llx, ",
+                                       wwn_to_u64(tgt_status_msg.wwpn));
+                               dprintk(TRC_INFO, vhba,
+                                       "port_id: 0x%x,loop_id: 0x%x\n",
+                                       be32_to_cpu(tgt_status_msg.port_id),
+                                       loop_id);
+                               dprintk(TRC_INFO, vhba,
+                                       "RSCN: old PWWN: %llx, old port_id: ",
+                                       wwn_to_u64(tq->fcport->port_name));
+                               dprintk(TRC_INFO, vhba,
+                                       "0x%x, old loop_id: 0x%x\n",
+                                       tq->fcport->d_id.b24,
+                                       tq->fcport->loop_id);
+                               tq->fcport->loop_id = (u32)
+                                   be16_to_cpu(tgt_status_msg.loop_id);
+                               tq->fcport->d_id.b24 = tq->d_id.b24 =
+                                   be32_to_cpu(tgt_status_msg.port_id);
+                               for (k = 0; k < WWN_SIZE; k++)
+                                       tq->port_name[k] =
+                                           tgt_status_msg.wwpn[k];
+
+                               lun_count = (u32)
+                                   be16_to_cpu(tgt_status_msg.lun_count);
+                               if (lun_count != tq->fcport->lun_count) {
+                                       dprintk(TRC_INFO, vhba,
+                                               "RSCN Target online: lun count is ");
+                                       dprintk(TRC_INFO, vhba, "different\n");
+                                       vhba->scan_reqd = 1;
+                               } else {
+                                       for (k = 0; k < lun_count; k++) {
+                                               if (tq->fcport->lun_ids[k] !=
+                                                   tgt_status_msg.lun_ids[k]) {
+                                                       dprintk(TRC_INFO,
+                                                               vhba,
+                                                               "RSCN Target ");
+                                                       dprintk(TRC_INFO,
+                                                               vhba,
+                                                               "online:lun id ");
+                                                       dprintk(TRC_INFO,
+                                                               vhba,
+                                                               "different\n");
+                                                       vhba->scan_reqd = 1;
+                                                       break;
+                                               }
+                                       }
+                               }
+                               for (k = 0; k < MAX_FIBRE_LUNS; k++)
+                                       tq->fcport->lun_ids[k] = -1;
+                               for (k = 0; k < lun_count; k++)
+                                       tq->fcport->lun_ids[k] =
+                                           tgt_status_msg.lun_ids[k];
+
+                               dprintk(TRC_INFO, NULL,
+                                       "New Lun_count= %d\n", lun_count);
+                               tq->fcport->lun_count = lun_count;
+                               memcpy(tq->fcport->port_name, tq->port_name,
+                                      WWN_SIZE);
+                               vhba_set_tgt_online(vhba, tq);
+                       } else {
+                               /*
+                                * Already in up state no need to process...
+                                */
+                               dprintk(TRC_INFO, vhba,
+                                       "RSCN:Target online");
+                               dprintk(TRC_INFO, vhba,
+                                       " msg received for already enabled");
+                               dprintk(TRC_INFO, vhba,
+                                       " device PWWN: %llx, ",
+                                       wwn_to_u64(tgt_status_msg.wwpn));
+                               dprintk(TRC_INFO, vhba,
+                                       "port_id: 0x%x,loop_id: 0x%x\n",
+                                       be32_to_cpu(tgt_status_msg.port_id),
+                                       loop_id);
+                               dprintk(TRC_INFO, vhba,
+                                       "RSCN: old PWWN: %llx, old port_id: ",
+                                       wwn_to_u64(tq->fcport->port_name));
+                               dprintk(TRC_INFO, vhba,
+                                       "0x%x, old loop_id: 0x%x\n",
+                                       tq->fcport->d_id.b24,
+                                       tq->fcport->loop_id);
+
+                               ha->stats.fc_stats.rscn_multiple_up_cnt++;
+                               tq->fcport->loop_id = (u32)
+                                   be16_to_cpu(tgt_status_msg.loop_id);
+                               tq->fcport->d_id.b24 = tq->d_id.b24 =
+                                   be32_to_cpu(tgt_status_msg.port_id);
+                               for (k = 0; k < WWN_SIZE; k++)
+                                       tq->port_name[k] =
+                                           tgt_status_msg.wwpn[k];
+                               lun_count = (u32)
+                                   be16_to_cpu(tgt_status_msg.lun_count);
+
+                               if (lun_count != tq->fcport->lun_count) {
+                                       dprintk(TRC_INFO, vhba,
+                                               "RSCN Target already online: lun");
+                                       dprintk(TRC_INFO, vhba,
+                                               " count is different\n");
+                                       vhba->scan_reqd = 1;
+                               } else {
+                                       for (k = 0; k < lun_count; k++) {
+                                               if (tq->fcport->lun_ids[k] !=
+                                                   tgt_status_msg.lun_ids[k]) {
+                                                       dprintk(TRC_INFO,
+                                                               vhba, "RSCN ");
+                                                       dprintk(TRC_INFO,
+                                                       vhba, "Target already");
+                                                       dprintk(TRC_INFO,
+                                                       vhba, " online: lun");
+                                                       dprintk(TRC_INFO,
+                                                       vhba, " count is ");
+                                                       dprintk(TRC_INFO,
+                                                       vhba, "different\n");
+                                                       vhba->scan_reqd = 1;
+                                                       break;
+                                               }
+                                       }
+                               }
+                               for (k = 0; k < MAX_FIBRE_LUNS; k++)
+                                       tq->fcport->lun_ids[k] = -1;
+                               for (k = 0; k < lun_count; k++) {
+                                       tq->fcport->lun_ids[k] =
+                                           tgt_status_msg.lun_ids[k];
+                                       dprintk(TRC_INFO, NULL,
+                                               "Lun id = %d\n",
+                                               tq->fcport->lun_ids[k]);
+                               }
+                               dprintk(TRC_INFO, NULL,
+                                       "New Lun_count= " "%d\n", lun_count);
+                               tq->fcport->lun_count = lun_count;
+                               memcpy(tq->fcport->port_name, tq->port_name,
+                                      WWN_SIZE);
+                       }
+               }
+               ha->stats.fc_stats.last_up_tgt = tgt;
+       }
+}
+
+void process_cqp_msg(struct virtual_hba *vhba, u8 *msg, int length)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct vhba_discovery_msg *r_msg;
+       struct vhba_discovery_cont_msg *r_cont_msg;
+       struct vhba_tgt_status_msg tgt_status_msg;
+       struct enable_rsp *enable_rsp;
+       struct os_tgt *tq;
+       struct tgt_info *tgt_msg;
+       struct vhba_link_status *link_status_msg = NULL;
+       struct xsvhba_work *xwork;
+       int type, vp;
+       int i, k, found;
+       int work_submitted = 0;
+
+       u8 port_name[WWN_SIZE];
+       u8 node_name[WWN_SIZE];
+       u32 lun_count;
+       u32 tgt;
+       u32 port_id;
+       u32 loop_id;
+       u32 t_count;
+       s32 bound_value;
+       u8 lun_map[MAX_FIBRE_LUNS >> 3];
+       u16 lun_id[MAX_FIBRE_LUNS];
+       u8 media_type;
+
+       xwork = kmalloc(sizeof(struct xsvhba_work), GFP_ATOMIC);
+       if (!xwork) {
+               eprintk(NULL, "vhba_work kmalloc failed\n");
+               return;
+       }
+
+       xwork->idr = vhba->idr;
+
+       type = *(u8 *) msg;
+
+       if (type == DISC_INFO_UPDATE) {
+               r_msg = (struct vhba_discovery_msg *)msg;
+               dprintk(TRC_INFO, vhba,
+                       "Got disc info from IOP." " length %d\n", length);
+
+               ha->stats.fc_stats.disc_info_cnt++;
+
+               if (be16_to_cpu(r_msg->target_count) == 0) {
+                       dprintk(TRC_INFO, vhba, "zero tgts discovered!\n");
+                       ha->target_count = 0;
+                       ha->max_targets = ha->target_count;
+                       ha->max_cont_segs = be16_to_cpu(r_msg->cont_count);
+                       dprintk(TRC_CQP, vhba,
+                               "Number of continuation segments = %d\n",
+                               ha->max_cont_segs);
+                       kfree(xwork);
+                       return;
+               }
+
+               ha->target_count = be16_to_cpu(r_msg->target_count);
+               t_count = (u32) ha->target_count;
+               dprintk(TRC_INFO, vhba, "Target Count %d\n", t_count);
+
+               ha->max_targets = ha->target_count;
+               ha->max_tgt_id = ha->max_targets;
+
+               ha->max_cont_segs = be16_to_cpu(r_msg->cont_count);
+               k = (int)ha->max_cont_segs;
+               dprintk(TRC_CQP, vhba, "Cont segs %d\n", k);
+
+               tgt_msg = (struct tgt_info *)(r_msg->tgt_data);
+
+               for (i = 0; i < ha->target_count; i++) {
+                       /*
+                        * use fcport from the message
+                        * also get the fclun info
+                        * check for return values...
+                        */
+                       for (k = 0; k < WWN_SIZE; k++)
+                               port_name[k] = tgt_msg[i].wwpn[k];
+
+                       for (k = 0; k < WWN_SIZE; k++)
+                               node_name[k] = tgt_msg[i].wwnn[k];
+
+                       port_id = be32_to_cpu(tgt_msg[i].port_id);
+                       loop_id = (u32) (be16_to_cpu(tgt_msg[i].loop_id));
+                       bound_value =
+                           be32_to_cpu(tgt_msg[i].persistent_binding);
+                       if ((bound_value != -1) &&
+                           (bound_value >= MAX_FIBRE_TARGETS)) {
+                               bound_value = -1;
+                       }
+                       lun_count = (u32) (be16_to_cpu(tgt_msg[i].lun_count));
+
+                       dprintk(TRC_INFO, vhba,
+                               "PWWN: %llx, NWWN: %llx, ",
+                               wwn_to_u64(port_name),
+                                wwn_to_u64(node_name));
+                       dprintk(TRC_INFO, vhba,
+                               "port_id(%x) loop_id(%x)",
+                               (int) port_id, (int)loop_id);
+                       dprintk(TRC_INFO, vhba,
+                               " bound_value(%d) lun_count(%d)\n",
+                               (int)bound_value, (int)lun_count);
+
+                       for (k = 0; k < lun_count; k++) {
+                               lun_id[k] = tgt_msg[i].lun_ids[k];
+                               dprintk(TRC_INFO, vhba,
+                                       "lun id = %d\n", lun_id[k]);
+                       }
+
+                       media_type = tgt_msg[i].media_type;
+
+                       vhba_target_bind(vhba, loop_id, node_name, port_name,
+                                        port_id, bound_value, lun_count,
+                                        lun_map, lun_id, media_type);
+               }
+
+               vhba_set_tgt_count(vhba);
+
+               if (ha->max_cont_segs == 0) {
+
+                       /* Map all unbound fcports to the tgt map */
+                       vhba_map_unbound_targets(vhba);
+
+                       /* Set the loop status to LINK_UP if already not up */
+                       if (atomic_read(&ha->link_state) != LINK_UP)
+                               atomic_set(&ha->link_state, LINK_UP);
+
+                       /*
+                        * Let the workqueue handle the scsi scan
+                        */
+
+                       atomic_set(&vhba->vhba_state, VHBA_STATE_SCAN);
+                       ha->discs_ready_flag = 1;
+                       INIT_WORK(&xwork->work, vhba_handle_scan);
+                       queue_work(vhba_workqueuep, &xwork->work);
+                       work_submitted = 1;
+
+               }
+               vhba->scan_reqd = 1;
+
+       } else if (type == DISC_INFO_CONT_UPDATE) {
+               r_cont_msg = (struct vhba_discovery_cont_msg *)msg;
+               dprintk(TRC_INFO, vhba, "Got cont disc info from IOP\n");
+
+               if ((ha->max_cont_segs == 0) &&
+                   (ha->max_cont_segs < r_cont_msg->seg_num)) {
+                       dprintk(TRC_CQP, vhba,
+                               "Max cont segs in the" " DISC_INFO msg is 0\n");
+                       return;
+               }
+
+               t_count = (u32) be16_to_cpu(r_cont_msg->target_count);
+               dprintk(TRC_INFO, vhba, "Cont Target Count %d\n", t_count);
+               k = (int)be16_to_cpu(r_cont_msg->seg_num);
+
+               if ((ha->target_count + t_count) <= MAX_FIBRE_TARGETS) {
+                       ha->target_count += t_count;
+
+                       tgt_msg = (struct tgt_info *)(r_cont_msg->tgt_data);
+                       for (i = 0; i < t_count; i++) {
+                               /*
+                                * use fcport from the message
+                                * also get the fclun info
+                                * check for return values...
+                                */
+                               for (k = 0; k < WWN_SIZE; k++)
+                                       port_name[k] = tgt_msg[i].wwpn[k];
+
+                               for (k = 0; k < WWN_SIZE; k++)
+                                       node_name[k] = tgt_msg[i].wwnn[k];
+
+                               port_id = be32_to_cpu(tgt_msg[i].port_id);
+                               loop_id = be16_to_cpu(tgt_msg[i].loop_id);
+                               bound_value =
+                                   be32_to_cpu(tgt_msg[i].persistent_binding);
+                               lun_count = be16_to_cpu(tgt_msg[i].lun_count);
+
+                               dprintk(TRC_INFO, vhba,
+                                       "PWWN: %llx, NWWN: %llx, ",
+                                       wwn_to_u64(port_name),
+                                       wwn_to_u64(node_name));
+                               dprintk(TRC_INFO, vhba,
+                                       "port_id(%x) loop_id(%x)",
+                                       (int)port_id, (int)loop_id);
+                               dprintk(TRC_INFO, vhba,
+                                       " bound_value(%d) lun_count(%d)\n",
+                                       (int)bound_value, (int)lun_count);
+
+                               for (k = 0; k < lun_count; k++) {
+                                       lun_id[k] = tgt_msg[i].lun_ids[k];
+                                       dprintk(TRC_INFO, vhba,
+                                               "lun id = %d\n", lun_id[k]);
+                               }
+
+                               media_type = tgt_msg[i].media_type;
+
+                               vhba_target_bind(vhba, loop_id, node_name,
+                                                port_name, port_id,
+                                                bound_value, lun_count,
+                                                lun_map, lun_id, media_type);
+                       }
+               }
+
+               dprintk(TRC_CQP, vhba, "max disc msgs cnt is %d\n",
+                       ha->max_cont_segs);
+               dprintk(TRC_CQP, vhba,
+                       "disc cont update seg num is %d %d\n",
+                       be16_to_cpu(r_cont_msg->seg_num), r_cont_msg->seg_num);
+
+               /* If last segment processed then start scanning */
+               if (ha->max_cont_segs == r_cont_msg->seg_num) {
+                       vhba_map_unbound_targets(vhba);
+
+                       ha->max_targets = ha->target_count;
+
+                       /* Set the loop status to LINK_UP if already not up */
+                       if (atomic_read(&ha->link_state) != LINK_UP)
+                               atomic_set(&ha->link_state, LINK_UP);
+
+                       dprintk(TRC_INFO, vhba,
+                               "max_tgt_id= %d :", ha->max_tgt_id);
+                       dprintk(TRC_INFO, vhba, " max_targets= %d\n",
+                               ha->max_targets);
+
+                       /*
+                        * Let the workqueue handle the scsi scan
+                        */
+
+                       atomic_set(&vhba->vhba_state, VHBA_STATE_SCAN);
+                       ha->discs_ready_flag = 1;
+                       INIT_WORK(&xwork->work, vhba_handle_scan);
+                       queue_work(vhba_workqueuep, &xwork->work);
+                       work_submitted = 1;
+               }
+               vhba->scan_reqd = 1;
+       } else if (type == TGT_STATUS_UPDATE) {
+
+               memcpy(&tgt_status_msg, (struct vhba_tgt_status_msg *)msg,
+                      sizeof(struct vhba_tgt_status_msg));
+               dprintk(TRC_INFO, vhba, "Got tgt status update from IOP\n");
+
+               vhba->scan_reqd = 1;
+
+               if (tgt_status_msg.flag == TGT_DEAD) {
+                       loop_id =
+                           (uint32_t) be16_to_cpu(tgt_status_msg.loop_id);
+                       for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+                               tq = TGT_Q(ha, tgt);
+                               port_id = be32_to_cpu(tgt_status_msg.port_id);
+                               if (tq && (memcmp(tgt_status_msg.wwpn,
+                                                 tq->fcport->port_name,
+                                                 WWN_SIZE) == 0)
+                                   && tq->d_id.b24 == port_id) {
+                                       atomic_set(&tq->fcport->state,
+                                                  FCS_DEVICE_DEAD);
+                                       ha->stats.fc_stats.rscn_dead_cnt++;
+                                       ha->stats.fc_stats.last_dead_tgt = tgt;
+                                       dprintk(TRC_INFO, vhba,
+                                               "RSCN: Target dead msg ");
+                                       dprintk(TRC_INFO, vhba,
+                                               "received: PWWN: %llx,",
+                                        wwn_to_u64(tgt_status_msg.wwpn));
+                                       dprintk(TRC_INFO, vhba,
+                                               "port_id: 0x%x,loop_id: 0x%x\n",
+                                               be32_to_cpu
+                                               (tgt_status_msg.port_id),
+                                               loop_id);
+                               }
+                       }
+                       vhba->scan_reqd = 1;
+               } else if (tgt_status_msg.flag == TGT_LOST) {
+                       found = 0;
+                       loop_id = (u32) be16_to_cpu(tgt_status_msg.loop_id);
+                       for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+                               tq = TGT_Q(ha, tgt);
+                               if (tq && (memcmp(tgt_status_msg.wwpn,
+                                                 tq->fcport->port_name,
+                                                 WWN_SIZE) == 0)) {
+                                       found = 1;
+                                       if (atomic_read(&tq->fcport->state) !=
+                                           FCS_DEVICE_LOST) {
+                                               dprintk(TRC_INFO, vhba,
+                                               "RSCN: Target Offline ");
+                                               dprintk(TRC_INFO, vhba,
+                                               "msg received: PWWN:%llx,",
+                                               wwn_to_u64
+                                               (tgt_status_msg.wwpn));
+                                               dprintk(TRC_INFO, vhba,
+                                               "port_id: 0x%x, ",
+                                               be32_to_cpu
+                                               (tgt_status_msg.port_id));
+                                               dprintk(TRC_INFO, vhba,
+                                               "loop_id: 0x%x\n",
+                                               loop_id);
+                                               ha->stats.
+                                                   fc_stats.rscn_dn_cnt++;
+                                               vhba_set_tgt_offline(vhba, tq);
+                                       } else {
+                                               dprintk(TRC_INFO, vhba,
+                                               "RSCN: Target Offline ");
+                                               dprintk(TRC_INFO, vhba,
+                                               "msg received for already ");
+                                               dprintk(TRC_INFO, vhba,
+                                               "disabled device: PWWN:%llx,",
+                                               wwn_to_u64
+                                               (tgt_status_msg.wwpn));
+                                               dprintk(TRC_INFO, vhba,
+                                               "port_id: 0x%x, ",
+                                               be32_to_cpu
+                                               (tgt_status_msg.port_id));
+                                               dprintk(TRC_INFO, vhba,
+                                               "loop_id: 0x%x\n",
+                                               loop_id);
+                                               ha->stats.fc_stats.
+                                                   rscn_multiple_dn_cnt++;
+                                       }
+                                       ha->stats.fc_stats.last_dn_tgt = tgt;
+                               }
+                       }
+                       if (!found) {
+                               eprintk(vhba,
+                                       "RSCN: No target ");
+                               eprintk(vhba, "found for offline msg: ");
+                               eprintk(vhba, "port_id: 0x%x, loop_id: 0x%x\n",
+                                       be32_to_cpu(tgt_status_msg.port_id),
+                                       loop_id);
+                       }
+               } else if (tgt_status_msg.flag == TGT_FOUND) {
+
+                       if (atomic_read(&ha->link_state) != LINK_UP) {
+                               ha->stats.fc_stats.link_up_cnt++;
+                               atomic_set(&ha->link_state, LINK_UP);
+                       }
+                       found = 0;
+                       vhba_handle_targets(vhba, tgt_status_msg, &found);
+                       if (!found) {
+                               /* Brand new target discovered. process it */
+                               loop_id =
+                                   (u32) be16_to_cpu(tgt_status_msg.loop_id);
+                               port_id = be32_to_cpu(tgt_status_msg.port_id);
+                               if (tgt_status_msg.persistent_binding != -1) {
+                                       bound_value =
+                                           be32_to_cpu
+                                           (tgt_status_msg.persistent_binding);
+                                       ha->stats.fc_stats.last_up_tgt =
+                                           bound_value;
+                               } else {
+                                       bound_value = -1;
+                               }
+
+                               if (bound_value > MAX_TARGETS) {
+                                       eprintk(vhba,
+                                               "bound value exceeds limits\n");
+                                       bound_value = -1;
+                               }
+
+                               dprintk(TRC_INFO, vhba,
+                                       "RSCN: Target online msg received fr");
+                               dprintk(TRC_INFO, vhba,
+                                       " new device: PWWN:%llx, ",
+                                        wwn_to_u64(tgt_status_msg.wwpn));
+                               dprintk(TRC_INFO, vhba,
+                                       "port_id: 0x%x, loop_id: ", (u32)
+                                       be32_to_cpu(tgt_status_msg.port_id));
+                               dprintk(TRC_INFO, vhba,
+                                       "0x%x binding: %d\n",
+                                       loop_id, (int)bound_value);
+                               dprintk(TRC_INFO, vhba,
+                                       "RSCN: Curr tgt_cnt: 0x%x max_tgt_id ",
+                                       ha->target_count);
+                               dprintk(TRC_INFO, vhba,
+                                       "0x%x, max_tgts 0x%x\n",
+                                       ha->max_tgt_id, ha->max_targets);
+                               for (k = 0; k < WWN_SIZE; k++)
+                                       port_name[k] = tgt_status_msg.wwpn[k];
+                               for (k = 0; k < WWN_SIZE; k++)
+                                       node_name[k] = tgt_status_msg.wwnn[k];
+                               lun_count =
+                                   (u32) (be16_to_cpu
+                                          (tgt_status_msg.lun_count));
+                               for (k = 0; k < lun_count; k++)
+                                       lun_id[k] = tgt_status_msg.lun_ids[k];
+
+                               media_type = tgt_status_msg.media_type;
+
+                               vhba_target_bind(vhba, loop_id, node_name,
+                                                port_name, port_id,
+                                                bound_value, lun_count,
+                                                lun_map, lun_id, media_type);
+                               vhba_map_unbound_targets(vhba);
+                               if (bound_value == -1)
+                                       ha->stats.fc_stats.last_up_tgt =
+                                           ha->max_tgt_id;
+                               if (vhba->scanned_once == 0) {
+                                       /*
+                                        * Let the workqueue handle the
+                                        * scsi scan
+                                        */
+                                       atomic_set(&vhba->vhba_state,
+                                                  VHBA_STATE_SCAN);
+                                       INIT_WORK(&xwork->work,
+                                                 vhba_handle_scan);
+                                       queue_work(vhba_workqueuep,
+                                                  &xwork->work);
+                                       work_submitted = 1;
+                               } else {
+                                       /*for new device */
+                                       vhba->scan_reqd = 1;
+                               }
+                       } else {
+                               vhba_set_tgt_count(vhba);
+                               atomic_set(&vhba->vhba_state,
+                                          VHBA_STATE_ACTIVE);
+                       }
+               }
+
+               schedule_update_rports(ha);
+
+       } else if (type == ENABLE_RSP) {
+               enable_rsp = (struct enable_rsp *)msg;
+               ha->stats.fc_stats.enable_resp_cnt++;
+               vp = (int)enable_rsp->vp_index;
+               dprintk(TRC_INFO, vhba,
+                       "Got enable rsp: vp_index %d, res_id %Lx for ha\n",
+                       vp, enable_rsp->resource_id);
+
+               for (i = 0; i < MAX_VHBAS; i++) {
+                       if (vhba->cfg && vhba->ha &&
+                           (vhba->resource_id == enable_rsp->resource_id)) {
+                               dprintk(TRC_INFO, vhba,
+                                       "Setting vp_index %d for ha\n", vp);
+                               vhba->ha->vp_index = enable_rsp->vp_index;
+                               break;
+                       }
+               }
+       } else if (type == PLINK_STATUS_UPDATE) {
+               dprintk(TRC_CQP, vhba, "got plink status update\n");
+               link_status_msg = (struct vhba_link_status *)msg;
+               if (link_status_msg->phy_link_status == LINK_DOWN) {
+                       dprintk(TRC_INFO, vhba,
+                               "received link down msg from iop\n");
+                       ha->stats.fc_stats.link_dn_cnt++;
+                       if (atomic_read(&ha->link_state) == LINK_UP) {
+                               atomic_set(&ha->link_state, LINK_DOWN);
+                               vhba_set_all_tgts_offline(vhba);
+                       } else {
+                               dprintk(TRC_INFO, vhba,
+                                       "vhba already in link down state\n");
+                       }
+               } else if (link_status_msg->phy_link_status == LINK_DEAD) {
+                       atomic_set(&ha->link_state, LINK_DEAD);
+                       ha->stats.fc_stats.link_dead_cnt++;
+                       dprintk(TRC_INFO, vhba, "vhba link dead state\n");
+               } else {
+                       ha->stats.fc_stats.link_up_cnt++;
+               }
+
+       } else {
+               eprintk(vhba, "Unknown msg from IOP\n");
+       }
+       /* Not all the code paths submit work.  In error cases
+        * or some states, the work might need to be freed */
+       if (!work_submitted)
+               kfree(xwork);
+}
+
+static inline struct cont_a64_entry *vhba_prep_cont_type1_iocb(struct
+                                                              virtual_hba
+                                                              *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct cont_a64_entry *cont_pkt;
+
+       if (!ha) {
+               eprintk(NULL, "null ha context\n");
+               return 0;
+       }
+
+       /* Adjust ring index. */
+       ha->req_ring_windex++;
+       if (ha->req_ring_windex == ha->request_q_length) {
+               ha->req_ring_windex = 0;
+               ha->request_ring_ptr = ha->request_ring;
+       } else
+               ha->request_ring_ptr++;
+
+       cont_pkt = (struct cont_a64_entry *)ha->request_ring_ptr;
+
+       /* Load packet defaults. */
+       cont_pkt->entry_type = CONTINUE_A64_TYPE;
+
+       return cont_pkt;
+}
+
+static inline void
+vhba_build_scsi_iocbs(struct srb *sp, struct cmd_type_7 *cmd_pkt, u16 tot_dsds)
+{
+       struct scsi_xg_vhba_host *ha = sp->ha;
+       struct virtual_hba *vhba = ha->vhba;
+       struct scsi_cmnd *cmd;
+       u16 avail_dsds;
+       u32 *cur_dsd;
+       u32 *rkey;
+       u32 rindex;
+       u32 sp_index;
+       u64 *page_list = NULL;
+       u64 mapped_addr;
+       u32 *cur_dsd_len;
+       int unaligned_io = 0;
+       int ret;
+       u32 request_bufflen = scsi_bufflen(sp->cmd);
+
+       u64 fmr_page_mask = ~((u64) PAGE_SIZE - 1);
+
+       cmd = sp->cmd;
+
+       /* Update entry type to indicate Command Type 3 IOCB */
+       cmd_pkt->entry_type = COMMAND_TYPE_7;
+
+       /* No data transfer */
+       if (request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) {
+               cmd_pkt->byte_count = cpu_to_le32(0);
+               sp->ha->stats.io_stats.total_task_mgmt_reqs++;
+               dprintk(TRC_SCSI_ERRS, vhba, "Task Mgmt Req. Returning\n");
+               return;
+       }
+
+       /* Set transfer direction */
+       if (cmd->sc_data_direction == DMA_TO_DEVICE) {
+               cmd_pkt->task_mgmt_flags =
+                   cpu_to_le16(TMF_WRITE_DATA);
+               ha->stats.io_stats.total_write_reqs++;
+               ha->stats.io_stats.total_write_mbytes += cmd_pkt->byte_count;
+       } else if (cmd->sc_data_direction == DMA_FROM_DEVICE) {
+               cmd_pkt->task_mgmt_flags =
+                   cpu_to_le16(TMF_READ_DATA);
+               ha->stats.io_stats.total_read_reqs++;
+               ha->stats.io_stats.total_read_mbytes += cmd_pkt->byte_count;
+       }
+
+       /* One DSD is available in the Command Type 3 IOCB */
+       cmd_pkt->rkey1 = 0;
+       cmd_pkt->rkey2 = 0;
+       cmd_pkt->rkey3 = 0;
+       cmd_pkt->rkey4 = 0;
+       cmd_pkt->rkey5 = 0;
+
+       avail_dsds = 1;
+       cur_dsd = (u32 *) &(cmd_pkt->dseg_0_address);
+       cur_dsd_len = (u32 *) &(cmd_pkt->dseg_0_len);
+       rkey = (u32 *) &(cmd_pkt->rkey1);
+       rindex = 0;
+       sp_index = 0;
+       sp->tot_dsds = tot_dsds;
+
+       /* Load data segments */
+       if (scsi_sg_count(cmd) != 0) {
+               struct scatterlist *cur_seg;
+               int mapped_len = 0;
+               int remaining_length = 0;
+               int first_pg_offset = 0;
+               int cntr = 0;
+               int t_cntr = 0;
+               u64 cur_map_ptr = 0;
+               int pg_list_cntr = 0;
+
+               dprintk(TRC_IO, vhba,
+                       "hndl %d: Scatter Gather list used\n",
+                       (int)cmd_pkt->handle);
+
+               {
+                       ha->stats.fmr_stats.total_fmr_ios++;
+
+                       cur_seg = scsi_sglist(cmd);
+                       dprintk(TRC_FMR, vhba,
+                               "SG tot_dsds %d. using FMR...\n", tot_dsds);
+
+                       page_list = kmalloc(sizeof(u64) *
+                                           ((request_bufflen /
+                                             PAGE_SIZE) +
+                                            (2 * tot_dsds)), GFP_ATOMIC);
+
+                       dprintk(TRC_FMR, vhba,
+                               "allocated %d address ptrs for fmr list\n",
+                               (int)((request_bufflen / PAGE_SIZE) +
+                                     (2 * tot_dsds)));
+                       if (!page_list) {
+                               eprintk(vhba, "alloc failed!\n");
+                               sp->error_flag = 1;
+                               return;
+                       }
+
+                       mapped_len = 0;
+
+                       for (cntr = 0; cntr < tot_dsds; cntr++) {
+                               if (pg_list_cntr > vhba_max_dsds_in_fmr) {
+                                       eprintk(vhba,
+                                       "%s: Page list ptrs ", __func__);
+                                       eprintk(vhba, "exceeeds 65!\n");
+                                       assert(0);
+                                       sp->error_flag = 1;
+                                       dprintk(TRC_FMR, vhba,
+                                               "freeing pg_list\n");
+                                       kfree(page_list);
+                                       page_list = NULL;
+                                       sp->error_flag = 1;
+                                       return;
+                               }
+                               remaining_length =
+                                   ib_sg_dma_len(vhba->xsmp_info.ib_device,
+                                                 cur_seg);
+                               cur_map_ptr =
+                                   ib_sg_dma_address(vhba->xsmp_info.ib_device,
+                                                     cur_seg) & fmr_page_mask;
+                               dprintk(TRC_FMR, vhba,
+                               "new dsd rem len %d ", remaining_length);
+                               dprintk(TRC_FMR, vhba,
+                                       "cur_map_ptr %lx\n",
+                                       (unsigned long)cur_map_ptr);
+                               if (cntr == 0) {
+                                       page_list[pg_list_cntr] =
+                                           ib_sg_dma_address(vhba->
+                                                       xsmp_info.ib_device,
+                                                             cur_seg) &
+                                           fmr_page_mask;
+                                       first_pg_offset =
+                                           (ib_sg_dma_address
+                                            (vhba->xsmp_info.ib_device,
+                                             cur_seg) -
+                                            page_list[pg_list_cntr]) &
+                                           ~fmr_page_mask;
+                                       remaining_length =
+                                           ib_sg_dma_len(vhba->
+                                                         xsmp_info.ib_device,
+                                                         cur_seg)
+                                           - (PAGE_SIZE - first_pg_offset);
+                                       dprintk(TRC_FMR, vhba,
+                                               "offset %d rem len in ",
+                                               first_pg_offset);
+                                       dprintk(TRC_FMR, vhba,
+                                       "dsd %d\n", remaining_length);
+                                       cur_map_ptr = page_list[pg_list_cntr] +
+                                           PAGE_SIZE;
+                                       pg_list_cntr++;
+                               } else {
+                                       if ((cur_map_ptr & 0xfff) != 0) {
+                                               dprintk(TRC_FMR, vhba,
+                                                       "\n%s(): Non-alligned",
+                                                       __func__);
+                                               dprintk(TRC_FMR, vhba,
+                                                       " page address = 0x%x",
+                                                       (int)cur_map_ptr);
+                                               panic("Non-aligned page in ");
+                                               panic("middle element\n");
+                                               assert(0);
+                                               ha->stats.
+                                           fmr_stats.unaligned_ptr_cnt++;
+                                               unaligned_io = 1;
+                                       }
+                               }
+                               while (remaining_length > 0) {
+                                       dprintk(TRC_FMR, vhba,
+                                               "rem len %d cntr %x ",
+                                       remaining_length, pg_list_cntr);
+                                       dprintk(TRC_FMR, vhba,
+                                               "cur_map_ptr %lx\n",
+                                               (unsigned long)cur_map_ptr);
+                                       page_list[pg_list_cntr] = cur_map_ptr;
+                                       remaining_length =
+                                           remaining_length - PAGE_SIZE;
+                                       cur_map_ptr += PAGE_SIZE;
+                                       pg_list_cntr++;
+                               }
+
+                               if (unaligned_io) {
+                                       ha->stats.fmr_stats.unaligned_io_cnt++;
+                                       dprintk(TRC_FMR, vhba,
+                                               "freeing pg_list\n");
+                                       kfree(page_list);
+                                       page_list = NULL;
+                                       sp->error_flag = 1;
+                                       return;
+                               }
+
+                               dprintk(TRC_FMR, vhba,
+                                       "final rem len %d cntr %d cur_map_ptr ",
+                                       remaining_length, pg_list_cntr);
+                               dprintk(TRC_FMR, vhba,
+                                       "%lx\n",
+                                       (unsigned long)cur_map_ptr);
+                               mapped_len +=
+                                   (int)ib_sg_dma_len(vhba->
+                                                      xsmp_info.ib_device,
+                                                      cur_seg);
+                               dprintk(TRC_FMR, vhba,
+                                       "hndl %d: mapped len is %u\n",
+                                       (int)cmd_pkt->handle, mapped_len);
+                               SG_NEXT(cur_seg);
+                       }
+
+                       for (t_cntr = 0; t_cntr < pg_list_cntr; t_cntr++)
+                               dprintk(TRC_FMR, vhba,
+                                       "hndl %d: SG FMR: page_list[%d] = ",
+                                       (int)cmd_pkt->handle, t_cntr);
+                               dprintk(TRC_FMR, vhba,
+                                       "%lx\n",
+                                       (unsigned long)page_list[t_cntr]);
+
+                       mapped_addr = page_list[0];
+                       dprintk(TRC_FMR, vhba,
+                               "calling map buf fmr len %u cmd",
+                               mapped_len);
+                       dprintk(TRC_FMR, vhba,
+                               " bufflen %u page_list_cntr %x mapped addr ",
+                               request_bufflen, pg_list_cntr);
+                       dprintk(TRC_FMR, vhba,
+                               "%lx\n",
+                               (unsigned long)mapped_addr);
+                       dprintk(TRC_FMR, vhba,
+                               "sp %lx sp_index %lx spfmr pool %lx\n",
+                               (unsigned long)sp, (unsigned long)sp_index,
+                               (unsigned long)sp->pool_fmr[sp_index]);
+                       ret = vhba_map_buf_fmr(vhba, page_list,
+                                              pg_list_cntr, &mapped_addr, sp,
+                                              sp_index);
+                       if (ret == -1) {
+                               dprintk(TRC_FMR_ERRS, vhba,
+                                       "vhba_map_buf_fmr failed\n");
+                               dprintk(TRC_FMR, vhba, "freeing pg_list\n");
+                               kfree(page_list);
+                               page_list = NULL;
+                               sp->error_flag = 1;
+                               return;
+                       }
+
+                       dprintk(TRC_FMR, vhba,
+                               "hndl %d: SG FMR: mapped addr %llx + ",
+                               (int)cmd_pkt->handle, mapped_addr);
+                       dprintk(TRC_FMR, vhba,
+                               "offset %d\n", first_pg_offset);
+                       dprintk(TRC_FMR, vhba,
+                               "hndl %d: SG FMR: len %u  rkey 0x%x ",
+                               (int)cmd_pkt->handle, mapped_len,
+                               ((struct ib_pool_fmr *)sp->pool_fmr[sp_index])->
+                               fmr->rkey);
+                       dprintk(TRC_FMR, vhba, "rindex 0x%x\n", rindex);
+                       mapped_addr = mapped_addr + first_pg_offset;
+                       *cur_dsd++ = cpu_to_le32(LSD(mapped_addr));
+                       *cur_dsd++ = cpu_to_le32(MSD(mapped_addr));
+                       *cur_dsd_len = cpu_to_le32((u32) request_bufflen);
+
+                       dprintk(TRC_FMR, NULL,
+                               "Original SCSI request_buflen = %d 0x%x\n",
+                               (u32) request_bufflen, (u32) request_bufflen);
+
+                       sp->tot_dsds = 1;
+                       cmd_pkt->dseg_count = cpu_to_le16(sp->tot_dsds);
+                       dprintk(TRC_FMR, vhba, "done with mapping\n");
+
+                       cmd_pkt->rkey1 = cpu_to_be32(((struct ib_pool_fmr *)
+                                                     sp->
+                                                     pool_fmr[sp_index])->fmr->
+                                                    rkey);
+               }
+       } else {
+               dma_addr_t req_dma;
+               unsigned long offset;
+
+               dprintk(TRC_FMR, vhba,
+                       "hndl %d: No Scatter Gather list used\n",
+                       (int)cmd_pkt->handle);
+               offset = ((unsigned long)scsi_sglist(cmd) & ~PAGE_MASK);
+               req_dma = ib_dma_map_single(vhba->xsmp_info.ib_device,
+                                           (void *)scsi_sglist(cmd),
+                                           request_bufflen,
+                                           cmd->sc_data_direction);
+               sp->dma_handle = req_dma;
+
+               if (req_dma & 0x7) {
+                       dprintk(TRC_ERRORS, vhba,
+                               "data buff address not 8 byte aligned!\n");
+                       sp->error_flag = 1;
+                       ib_dma_unmap_single(vhba->xsmp_info.ib_device,
+                                           sp->dma_handle, request_bufflen,
+                                           cmd->sc_data_direction);
+                       return;
+               }
+
+               {
+                       int i;
+                       int num_pages;
+
+                       req_dma = req_dma & fmr_page_mask;
+                       offset = sp->dma_handle - req_dma;
+                       sp_index = 0;
+
+                       /* Get the number of pages */
+                       num_pages = (unsigned long)
+                           request_bufflen / PAGE_SIZE;
+                       if (request_bufflen % PAGE_SIZE)
+                               num_pages += 1;
+
+                       if ((offset + (request_bufflen % PAGE_SIZE)) >
+                           PAGE_SIZE)
+                               num_pages += 1;
+
+                       page_list = kmalloc(sizeof(u64) *
+                                           num_pages, GFP_ATOMIC);
+                       if (!page_list) {
+                               eprintk(vhba, "Page alloc failed!\n");
+                               /*
+                                * CHECK: need to possibly call
+                                * ib_dma_unmap_single here to free
+                                * up the dma mapping
+                                */
+                               sp->error_flag = 1;
+                               return;
+                       }
+
+                       for (i = 0; i < num_pages; i++) {
+                               page_list[i] = sp->dma_handle + (PAGE_SIZE * i);
+                               page_list[i] &= fmr_page_mask;
+                       }
+                       mapped_addr = cmd_pkt->handle + 1;
+                       mapped_addr = mapped_addr << 12;
+                       mapped_addr = page_list[0];
+
+                       ret = vhba_map_buf_fmr(vhba, page_list, num_pages,
+                                              &mapped_addr, sp, sp_index);
+
+                       if (ret == -1) {
+                               dprintk(TRC_ERRORS, vhba,
+                                       "vhba_map_buf_fmr failed\n");
+                               kfree(page_list);
+                               page_list = NULL;
+                               sp->error_flag = 1;
+                               return;
+                       }
+
+                       dprintk(TRC_FMR, vhba,
+                               "no sg: hndl %d: NSG FMR: req_dma %llx",
+                               (int)cmd_pkt->handle,
+                               (unsigned long long int)req_dma);
+                       dprintk(TRC_FMR, vhba,
+                               " mapped addr %llx + offset %lu\n",
+                               mapped_addr, offset);
+                       mapped_addr += offset;
+                       rkey[rindex] = cpu_to_be32(((struct ib_pool_fmr *)
+                                                   sp->
+                                                   pool_fmr[sp_index])->fmr->
+                                                  rkey);
+                       *cur_dsd++ = cpu_to_le32(LSD(mapped_addr));
+                       *cur_dsd++ = cpu_to_le32(MSD(mapped_addr));
+                       *cur_dsd_len = cpu_to_le32((u32) request_bufflen);
+
+                       dprintk(TRC_FMR, NULL,
+                               "Original SCSI request_buflen = %d 0x%x\n",
+                               (u32) request_bufflen, (u32) request_bufflen);
+
+                       dprintk(TRC_FMR, vhba,
+                               "no sg: hndl %d: NSG FMR: mapped addr",
+                               (int)cmd_pkt->handle);
+                       dprintk(TRC_FMR, vhba,
+                               " 0x%llx len 0x%x rkey 0x%x rindex 0x%x\n",
+                               mapped_addr, request_bufflen,
+                       ((struct ib_pool_fmr *)sp->pool_fmr[sp_index])->
+                               fmr->rkey, rindex);
+
+               }
+       }
+
+       kfree(page_list);
+       page_list = NULL;
+}
+
+static void sense_buffer(struct scsi_cmnd *cmd, int key, int asc, int asq)
+{
+       u8 *sbuff;
+
+       sbuff = cmd->sense_buffer;
+       memset(sbuff, 0, SCSI_SENSE_BUFFERSIZE);
+       sbuff[0] = 0x70;        /* fixed, current */
+       sbuff[2] = key;
+       sbuff[7] = 0xa;         /* implies 18 byte sense buffer */
+       sbuff[12] = asc;
+       sbuff[13] = asq;
+
+       dprintk(TRC_SCSI_ERRS, NULL, "[sense_key,asc,ascq]: [0x%x,0x%x,0x%x]\n",
+               key, asc, asq);
+}
+
+int vhba_report_luns_cmd(struct srb *sp, u32 t, u32 l)
+{
+       struct scatterlist *sg;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+       struct scsi_cmnd *cmd;
+       struct xg_scsi_lun *lun;
+       struct os_tgt *tq;
+       unsigned long flags = 0;
+       int ret = 0;
+       int i;
+       u16 lun_cnt;
+       int lun_byte;
+       int rsp_byte;
+       int total_size;
+       int copy_len;
+       char *buf;
+       char *data_ptr;
+       u8 *cdb;
+       int alloc_len;
+       int req_len;
+       int act_len;
+       u32 request_bufflen = scsi_bufflen(sp->cmd);
+
+       cmd = sp->cmd;
+       ha = sp->ha;
+       cdb = cmd->cmnd;
+       vhba = ha->vhba;
+
+       dprintk(TRC_FUNCS, vhba, "Entering...\n");
+
+       spin_lock_irqsave(&ha->io_lock, flags);
+
+       /* Check allocation length and select report */
+       alloc_len = cdb[9] + (cdb[8] << 8) + (cdb[7] << 16) + (cdb[6] << 24);
+       if ((alloc_len < 16) || (cdb[2] > 2)) {
+               sense_buffer(cmd, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
+               cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+               ret = 1;
+               goto err;
+       }
+
+       /* Check reserved bit */
+       if (cdb[1] || cdb[3] || cdb[4] || cdb[5] || cdb[10]) {
+               sense_buffer(cmd, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
+               cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION;
+               ret = 1;
+               goto err;
+       }
+
+       tq = TGT_Q(ha, t);
+       lun_cnt = tq->fcport->lun_count;
+       lun_byte = lun_cnt * sizeof(struct xg_scsi_lun);
+       rsp_byte = (lun_cnt + 1) * sizeof(struct xg_scsi_lun);
+
+       /* Calculate actual length */
+       req_len = request_bufflen;
+       scsi_set_resid(cmd, 0);
+       if (alloc_len < req_len) {
+               act_len = alloc_len;
+               scsi_set_resid(cmd, req_len - alloc_len);
+       } else {
+               act_len = req_len;
+               scsi_set_resid(cmd, alloc_len - req_len);
+       }
+       dprintk(TRC_SCSI, vhba, "req_len=%d, alloc_len=%d, act_len=%d, ",
+               req_len, alloc_len, act_len);
+
+       if (rsp_byte > act_len) {
+               rsp_byte = act_len;
+               lun_cnt = act_len / sizeof(struct xg_scsi_lun);
+               if (lun_cnt > 0)
+                       lun_cnt--;
+               else
+                       lun_cnt = 0;
+               dprintk(TRC_SCSI, vhba,
+                       "Truncate response buffer, " "lun_cnt=%d\n", lun_cnt);
+       }
+       dprintk(TRC_SCSI, vhba, "Total number of luns active = %d\n", lun_cnt);
+
+       lun = kmalloc(rsp_byte, GFP_ATOMIC);
+       if (!lun) {
+               dprintk(TRC_SCSI, vhba, "Fail to allocate memory\n");
+               cmd->result = DID_ERROR << 16;
+               ret = 1;
+               goto err;
+       }
+       memset(lun, 0, rsp_byte);
+
+       /* Create the header. */
+       lun[0].scsi_lun[0] = (lun_byte >> 24) & 0xff;
+       lun[0].scsi_lun[1] = (lun_byte >> 16) & 0xff;
+       lun[0].scsi_lun[2] = (lun_byte >> 8) & 0xff;
+       lun[0].scsi_lun[3] = (lun_byte >> 0) & 0xff;
+
+       /* Create data */
+       for (i = 1; i <= lun_cnt; i++) {
+               lun[i].scsi_lun[0] = ((tq->fcport->lun_ids[i - 1] >> 8) & 0xff);
+               lun[i].scsi_lun[1] = (tq->fcport->lun_ids[i - 1] & 0xff);
+               lun[i].scsi_lun[2] = 0;
+               lun[i].scsi_lun[3] = 0;
+               lun[i].scsi_lun[4] = 0;
+               lun[i].scsi_lun[5] = 0;
+               lun[i].scsi_lun[6] = 0;
+               lun[i].scsi_lun[7] = 0;
+       }
+
+       /* Data copy */
+       if (scsi_sg_count(cmd)) {
+               data_ptr = (u8 *) &(lun[0]);
+               total_size = rsp_byte;
+               sg = scsi_sglist(cmd);
+               dprintk(TRC_SCSI, vhba, "S/G list, num_sg=%d, buf_len=%d\n",
+                       scsi_sg_count(cmd), request_bufflen);
+               dprintk(TRC_SCSI, vhba, "total response size = 0x%x\n",
+                       total_size);
+
+               while (total_size > 0) {
+                       unsigned int sg_offset = SG_OFFSET(sg);
+                       unsigned int sg_length = SG_LENGTH(sg);
+
+                       if (total_size > (sg_length - sg_offset))
+                               copy_len = sg_length - sg_offset;
+                       else
+                               copy_len = total_size;
+
+                       dprintk(TRC_SCSI, vhba,
+                               "sg_len=0x%x, sg_offset=0x%x, ",
+                               sg_length, sg_offset);
+                       dprintk(TRC_SCSI, vhba, "copy_len=0x%x\n",
+                               copy_len);
+
+                       buf = page_address(sg_page(sg)) + sg_offset;
+                       if (!buf) {
+                               ret = 1;
+                               goto err_2;
+                       }
+                       memcpy(buf, data_ptr, copy_len);
+
+                       total_size -= copy_len;
+                       if (total_size > 0) {
+                               dprintk(TRC_SCSI, vhba,
+                                       "More data 0x%x\n", total_size);
+                               data_ptr += copy_len;
+                               SG_NEXT(sg);
+                       }
+               }
+               SG_RESET(sg);
+       } else if (request_bufflen) {
+               dprintk(TRC_SCSI, vhba, "Single buffer size=0x%x\n",
+                       request_bufflen);
+               memcpy(scsi_sglist(cmd), (void *)lun, rsp_byte);
+       }
+       cmd->result = DID_OK << 16;
+err_2:
+       kfree(lun);
+err:
+       spin_unlock_irqrestore(&ha->io_lock, flags);
+
+       return ret;
+}
+
+int vhba_start_scsi(struct srb *sp, u32 tgt, u32 lun, u32 handle)
+{
+       struct cmd_type_7 *cmd_pkt;
+       struct scsi_xg_vhba_host *ha = sp->ha;
+       struct virtual_hba *vhba = ha->vhba;
+       struct scsi_cmnd *cmd = sp->cmd;
+       struct os_tgt *tq;
+       struct scatterlist *sg;
+       int tot_dsds;
+       int req_cnt, i;
+       u16 lcl_timeout;
+       u32 request_bufflen = scsi_bufflen(cmd);
+
+       dprintk(TRC_FUNCS, NULL, "Entering...\n");
+
+       sp->unaligned_sg = NULL;
+       sp->bounce_buffer = NULL;
+       if (scsi_sg_count(cmd) && (sp->cmd->sc_data_direction != DMA_NONE)) {
+               if (check_sg_alignment(sp, scsi_sglist(cmd))) {
+                       sp->unaligned_sg = vhba_setup_bounce_buffer(sp);
+                       if (!sp->unaligned_sg) {
+                               pr_err("Error: unable to setup bounce buffr\n");
+                               sp->error_flag = 1;
+                               return 1;
+                       }
+                       ha->stats.fmr_stats.unaligned_io_cnt++;
+               }
+       }
+
+       /*
+        * Enqueue srb in the outstanding commands
+        * Check if marker is needed
+        */
+       tot_dsds = 0;
+       sg = NULL;
+
+       if (scsi_sg_count(cmd)) {
+               sg = scsi_sglist(cmd);
+               tot_dsds = ib_dma_map_sg(vhba->xsmp_info.ib_device,
+                                        sg, scsi_sg_count(cmd),
+                                        cmd->sc_data_direction);
+       } else if (request_bufflen)
+               tot_dsds++;
+
+       req_cnt = 1;
+
+       if (req_cnt > MAX_IOCBS_IN_VH) {
+               eprintk(vhba,
+                       "Total IOCBS %d > max val %d with ",
+                       req_cnt, MAX_IOCBS_IN_VH);
+               eprintk(vhba, "total dsds %d\n", tot_dsds);
+               goto queuing_error;
+       }
+
+       if (tot_dsds > vhba_max_dsds_in_fmr) {
+               eprintk(vhba, "Total DSDs %d > %d\n",
+                       tot_dsds, (int)vhba_max_dsds_in_fmr);
+               goto queuing_error;
+       }
+
+       if (((ha->req_ring_windex + 1) % 1024) == *ha->req_ring_rindex) {
+               dprintk(TRC_IO, NULL, "Queue full\n");
+               goto queuing_error;
+       }
+
+       /* Make sure there is place for all IOCBS in the ring... */
+       cmd_pkt = (struct cmd_type_7 *)ha->request_ring_ptr;
+
+       memset(cmd_pkt, 0, sizeof(struct cmd_type_7));
+
+       cmd_pkt->handle = handle;
+       sp->iocb_handle = handle;
+       if (vhba_multiple_q)
+               cmd_pkt->handle = cmd_pkt->handle | (sp->queue_num << 16);
+
+       sp->cmd->host_scribble =
+           (unsigned char *)(unsigned long)cmd_pkt->handle;
+       ha->req_q_cnt -= req_cnt;
+
+       tq = TGT_Q(ha, tgt);
+       cmd_pkt->nport_handle = cpu_to_le16(tq->fcport->loop_id);
+       dprintk(TRC_IO, vhba, "NPORT hndl is 0x%x\n", cmd_pkt->nport_handle);
+
+       cmd_pkt->port_id[0] = tq->d_id.b.al_pa;
+       dprintk(TRC_IO, vhba, "PORT ID byte 0 is 0x%x\n", cmd_pkt->port_id[0]);
+       cmd_pkt->port_id[1] = tq->d_id.b.area;
+       dprintk(TRC_IO, vhba, "PORT ID byte 1 is 0x%x\n", cmd_pkt->port_id[1]);
+       cmd_pkt->port_id[2] = tq->d_id.b.domain;
+       dprintk(TRC_IO, vhba, "PORT ID byte 2 is 0x%x\n", cmd_pkt->port_id[2]);
+
+       cmd_pkt->dseg_count = cpu_to_le16(tot_dsds);
+
+       cmd_pkt->lun[1] = LSB(lun);
+       cmd_pkt->lun[2] = MSB(lun);
+       host_to_fcp_swap(cmd_pkt->lun, sizeof(cmd_pkt->lun));
+
+       dprintk(TRC_IO, vhba, "hndl %d: cdb buffer dump:\n",
+               (int)cmd_pkt->handle);
+       if (vhba_debug == TRC_IO) {
+               for (i = 0; i < cmd->cmd_len; i++)
+                       dprintk(TRC_IO, vhba, "%x ", cmd->cmnd[i]);
+               dprintk(TRC_IO, vhba, "\n");
+       }
+
+       memcpy(cmd_pkt->fcp_cdb, cmd->cmnd, cmd->cmd_len);
+       host_to_fcp_swap(cmd_pkt->fcp_cdb, sizeof(cmd_pkt->fcp_cdb));
+
+       /*
+        * timeout_per_command(cmd) is the timeout value
+        * for the cmd from SCSI and is in milliseconds
+        * so divide by 1000 to get in secs
+        */
+
+       if ((timeout_per_command(cmd) / 1000) > 5) {
+               lcl_timeout =
+                   (u16) (((timeout_per_command(cmd) / 1000) * 8) / 10);
+       } else if ((timeout_per_command(cmd) / 1000) >= 2)
+               lcl_timeout =
+                   cpu_to_le16((timeout_per_command(cmd) / 1000) - 1);
+       else if ((timeout_per_command(cmd) / 1000) == 1)
+               lcl_timeout = cpu_to_le16(1);
+       else
+               lcl_timeout = cpu_to_le16(VHBA_CMD_TIMEOUT);
+
+       cmd_pkt->timeout = cpu_to_le16(lcl_timeout);
+       dprintk(TRC_IO, vhba, "sp = %p, scsi_pkt_timeout = %d\n",
+               sp, timeout_per_command(cmd));
+       cmd_pkt->byte_count = cpu_to_le32((u32) request_bufflen);
+       dprintk(TRC_IO, vhba, "hndl %d: byte cnt x%0x, lcl_timeout:0x%x\n",
+               (int)cmd_pkt->handle, cmd_pkt->byte_count, lcl_timeout);
+
+       vhba_build_scsi_iocbs(sp, cmd_pkt, tot_dsds);
+
+       if (sp->error_flag) {
+               if (scsi_sg_count(cmd))
+                       ib_dma_unmap_sg(vhba->xsmp_info.ib_device,
+                                       sg, scsi_sg_count(cmd),
+                                       cmd->sc_data_direction);
+               return 1;
+       }
+
+       cmd_pkt->vp_index = ha->vp_index;
+
+       if (cmd_pkt->byte_count != cpu_to_le32((u32) request_bufflen))
+               dprintk(TRC_IO, vhba,
+                       "hndl %d: byte cnt %d != req buff ",
+                       (int)cmd_pkt->handle, cmd_pkt->byte_count);
+               dprintk(TRC_IO, vhba, "len %d\n",
+                       cpu_to_le32((u32) request_bufflen));
+
+       if (req_cnt != 1)
+               dprintk(TRC_IO, vhba, "curr entry cnt is %d\n", req_cnt);
+       cmd_pkt->entry_count = 1;
+
+       sp->flags |= SRB_DMA_VALID;
+
+       /* Adjust ring index  and send a write index update... */
+       ha->req_ring_windex++;
+       if (ha->req_ring_windex == REQUEST_ENTRY_CNT_24XX) {
+               ha->req_ring_windex = 0;
+               ha->request_ring_ptr = ha->request_ring;
+       } else
+               ha->request_ring_ptr++;
+
+       if (vhba_send_write_index(vhba)) {
+               dprintk(TRC_ERRORS, vhba, "send write index failed\n");
+               sp->flags &= ~SRB_DMA_VALID;
+               if (scsi_sg_count(sp->cmd)) {
+                       ib_dma_unmap_sg(vhba->xsmp_info.ib_device,
+                                       scsi_sglist(sp->cmd),
+                                       scsi_sg_count(sp->cmd),
+                                       sp->cmd->sc_data_direction);
+               } else if (request_bufflen) {
+                       ib_dma_unmap_single(vhba->xsmp_info.ib_device,
+                                           sp->dma_handle,
+                                           request_bufflen,
+                                           sp->cmd->sc_data_direction);
+               }
+               vhba_unmap_buf_fmr(vhba, sp, sp->tot_dsds);
+
+               return 1;
+       }
+
+       return 0;
+
+queuing_error:
+       if (scsi_sg_count(cmd))
+               ib_dma_unmap_sg(vhba->xsmp_info.ib_device, sg,
+                               scsi_sg_count(cmd), cmd->sc_data_direction);
+       dprintk(TRC_SCSI_ERRS, vhba,
+               "Cannot queue req as IOCB to ring (err2)\n");
+       return 1;
+}
+
+int vhba_send_abort(struct virtual_hba *vhba, int abort_handle, int t)
+{
+       struct vhba_abort_cmd *abort_msg = NULL;
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct os_tgt *tq = NULL;
+       int ret = 0;
+
+       tq = TGT_Q(ha, t);
+
+       if (!tq) {
+               eprintk(vhba, "null tq context in vhba_send_abort\n");
+               return 2;
+       }
+
+       abort_msg = kmalloc(sizeof(struct vhba_abort_cmd), GFP_ATOMIC);
+       if (!abort_msg) {
+               eprintk(vhba, "kmalloc failed for send xsmp abort\n");
+               return 1;
+       }
+
+       abort_msg->type = ABORT_CMD;
+       abort_msg->handle_to_abort = abort_handle;
+       abort_msg->port_id[0] = tq->d_id.b.al_pa;
+       abort_msg->port_id[1] = tq->d_id.b.area;
+       abort_msg->port_id[2] = tq->d_id.b.domain;
+
+       dprintk(TRC_INFO, vhba,
+               "sending abort msg for handle %x p0 %x p1 %x p2 %x\n",
+               abort_handle, abort_msg->port_id[0],
+               abort_msg->port_id[1], abort_msg->port_id[2]);
+
+       /* check qp status */
+       if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED)
+               ret = xscore_post_send(&vhba->ctrl_conn.ctx,
+                                      (u8 *) abort_msg,
+                                      sizeof(struct vhba_abort_cmd),
+                                      XSCORE_DEFER_PROCESS);
+       else {
+               dprintk(TRC_INFO, vhba, "qp already in disconn state\n");
+               kfree(abort_msg);
+               return VHBA_QP_DISCONNECTED;
+       }
+
+       if (ret) {
+               ha->stats.ib_stats.cqp_send_err_cnt++;
+               eprintk(vhba, "xsigo ib send msg failed [%d]\n", ret);
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               kfree(abort_msg);
+               return 1;
+       }
+
+       return 0;
+}
+
+int vhba_send_tgt_reset(struct virtual_hba *vhba, int t)
+{
+       struct vhba_tgt_reset_msg *reset_msg = NULL;
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct os_tgt *tq;
+       int ret = 0;
+       int i;
+
+       tq = TGT_Q(ha, t);
+
+       /*
+        * TODO: there should be a mechanism to check whether the otgt
+        * array has been fully populated. This is a simple check in the
+        * meantime.
+        */
+       if (!tq) {
+               pr_err("null tq context in vhba_send_tgt_reset\n");
+               return 2;
+       }
+
+       reset_msg = kmalloc(sizeof(struct vhba_tgt_reset_msg), GFP_ATOMIC);
+       if (!reset_msg) {
+               eprintk(NULL, "kmalloc failed for send xsmp abort\n");
+               return 1;
+       }
+
+       reset_msg->type = TGT_RESET;
+       reset_msg->vhba_id = ha->vp_index;
+
+       for (i = 0; i < WWN_SIZE; i++)
+               reset_msg->wwpn[i] = tq->port_name[i];
+
+       /* check qp status */
+       if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED) {
+               dprintk(TRC_INFO, vhba,
+                       "sending tgt reset msg for vhba %p\n", vhba);
+               ret = xscore_post_send(&vhba->ctrl_conn.ctx,
+                                      (u8 *) reset_msg,
+                                      sizeof(struct vhba_tgt_reset_msg),
+                                      XSCORE_DEFER_PROCESS);
+       } else {
+               dprintk(TRC_INFO, vhba, "qp already in disconn state\n");
+               kfree(reset_msg);
+               return VHBA_QP_DISCONNECTED;
+       }
+
+       if (ret) {
+               ha->stats.ib_stats.cqp_send_err_cnt++;
+               eprintk(vhba, "xsigo ib send msg failed?\n");
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               kfree(reset_msg);
+               return 1;
+       }
+
+       return 0;
+}
+
+int vhba_send_lun_reset(struct virtual_hba *vhba, int t, int l)
+{
+       struct vhba_lun_reset_msg *reset_msg = NULL;
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct os_tgt *tq;
+       struct os_lun *lq;
+       int ret = 0;
+       int i;
+
+       tq = TGT_Q(ha, t);
+
+       /*
+        * TODO: there should be a mechanism to check whether the otgt
+        * array has been fully populated. This is a simple check in the
+        * meantime.
+        */
+       if (!tq) {
+               pr_err("null tq context in vhba_send_lun_reset\n");
+               return 2;
+       }
+
+       lq = LUN_Q(ha, t, l);
+       if (!lq) {
+               pr_err("null lq context in vhba_send_lun_reset\n");
+               return 3;
+       }
+
+       reset_msg = kmalloc(sizeof(struct vhba_lun_reset_msg), GFP_ATOMIC);
+       if (!reset_msg) {
+               eprintk(NULL, "kmalloc failed for send xsmp lun reset\n");
+               return 1;
+       }
+
+       reset_msg->type = LUN_RESET;
+       reset_msg->vhba_id = ha->vp_index;
+       reset_msg->lun = (u16) l;
+
+       for (i = 0; i < WWN_SIZE; i++)
+               reset_msg->wwpn[i] = tq->port_name[i];
+
+       /* check qp status */
+       if (atomic_read(&ha->qp_status) == VHBA_QP_CONNECTED) {
+               dprintk(TRC_INFO, vhba,
+                       "sending lun reset msg for vhba %p\n", vhba);
+               ret = xscore_post_send(&vhba->ctrl_conn.ctx,
+                                      (u8 *) reset_msg,
+                                      sizeof(struct vhba_lun_reset_msg),
+                                      XSCORE_DEFER_PROCESS);
+       } else {
+               dprintk(TRC_INFO, vhba, "qp already in disconn state\n");
+               kfree(reset_msg);
+               return VHBA_QP_DISCONNECTED;
+       }
+
+       if (ret) {
+               ha->stats.ib_stats.cqp_send_err_cnt++;
+               eprintk(vhba, "xsocre_post_send() failed?\n");
+               ib_link_down(ha);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+               kfree(reset_msg);
+               return 1;
+       }
+       return 0;
+}
+
+struct os_lun *vhba_allocate_lun(struct virtual_hba *vhba, u32 tgt, u32 lun)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct os_lun *lq;
+       u32 max_lun;
+
+       if (vhba->cfg->lunmask_enable)
+               max_lun = MAX_FIBRE_LUNS;
+       else
+               max_lun = MAX_FIBRE_LUNS_MORE;
+
+       /* If SCSI addressing OK, allocate LUN queue. */
+       if (tgt >= MAX_TARGETS || lun >= max_lun) {
+               eprintk(vhba,
+                       "scsi(%ld): Unable to allocate lun, invalid ",
+                       ha->host_no);
+               eprintk(vhba, "parameters %d %d. Returning null\n",
+                       tgt, lun);
+               return NULL;
+       }
+
+       if (TGT_Q(ha, tgt) == NULL) {
+               eprintk(vhba, "Tgt %d not found in tgt_q\n", tgt);
+               return NULL;
+       }
+
+       lq = LUN_Q(ha, tgt, lun);
+       if (lq == NULL) {
+               lq = kmalloc(sizeof(struct os_lun), GFP_ATOMIC);
+               if (lq != NULL) {
+                       dprintk(TRC_IO, vhba,
+                               "scsi(%ld): Alloc Lun %d @ tgt %d\n",
+                               ha->host_no, lun, tgt);
+
+                       memset(lq, 0, sizeof(struct os_lun));
+                       LUN_Q(ha, tgt, lun) = lq;
+               }
+       }
+
+       if (lq == NULL) {
+               eprintk(vhba, "Unable to allocate lun\n");
+               return NULL;
+       }
+
+       return lq;
+}
+
+static struct os_tgt *vhba_tgt_alloc(struct virtual_hba *vhba, u32 tgt)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct os_tgt *tq;
+
+       /* If SCSI addressing OK, allocate TGT queue and lock. */
+       if (tgt >= MAX_TARGETS) {
+               eprintk(vhba,
+                       "scsi(%ld): Unable to allocate", ha->host_no);
+               eprintk(vhba,
+                       " target, invalid target number %d. Returning null\n",
+                       tgt);
+               return NULL;
+       }
+
+       tq = TGT_Q(ha, tgt);
+       if (tq == NULL) {
+               tq = kmalloc(sizeof(struct os_tgt), GFP_ATOMIC);
+               if (tq != NULL) {
+                       dprintk(TRC_IO, vhba,
+                               "scsi(%ld): Alloc Target %d @ %p\n",
+                               ha->host_no, tgt, tq);
+                       memset(tq, 0, sizeof(struct os_tgt));
+                       tq->ha = ha;
+                       tq->init_done = 0;
+                       TGT_Q(ha, tgt) = tq;
+                       tq->state = VHBA_IO_STATE_ACTIVE;
+               }
+       }
+
+       if (tq != NULL) {
+               tq = TGT_Q(ha, tgt);
+               if (tq)
+                       dprintk(TRC_IO, vhba, "tq is same as TGT_Q\n");
+               else
+                       dprintk(TRC_IO, vhba, "tq is not same as TGT_Q\n");
+       } else
+               eprintk(vhba, "Unable to allocate target\n");
+
+       return tq;
+}
+
+static u32
+vhba_target_bind(struct virtual_hba *vhba, u32 loop_id, u8 *node_name,
+                u8 *port_name, u32 port_id, s32 bound_value,
+                u32 lun_count, u8 *lun_map, u16 *lun_id, u8 media_type)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct os_tgt *tq;
+       struct fc_port *fcport;
+       struct fc_port *fcporttemp;
+       unsigned long flags;
+       u32 tgt;
+       int port_found;
+       int k, id;
+
+       port_found = 0;
+       spin_lock_irqsave(&ha->list_lock, flags);
+
+       list_for_each_entry_safe(fcport, fcporttemp, &ha->disc_ports, list) {
+               if (memcmp(port_name, fcport->port_name, WWN_SIZE) == 0) {
+                       port_found = 1;
+                       break;
+               }
+       }
+
+       spin_unlock_irqrestore(&ha->list_lock, flags);
+
+       if (port_found) {
+               /*
+                * Port must be already bound at a particular location
+                * Just set the state and flags
+                */
+               dprintk(TRC_IO, NULL,
+                       "port already exists, so just updating info\n");
+               fcport->d_id.b24 = port_id;
+               fcport->loop_id = loop_id;
+               fcport->lun_count = lun_count;
+               if (fcport->tgt_queue) {
+                       fcport->tgt_queue->d_id.b24 = fcport->d_id.b24;
+                       set_bit(TQF_ONLINE, &fcport->tgt_queue->flags);
+               }
+               if (media_type == TYPE_TAPE)
+                       fcport->flags |= FCF_TAPE_PRESENT;
+               else
+                       fcport->flags &= ~FCF_TAPE_PRESENT;
+               atomic_set(&fcport->state, FCS_ONLINE);
+               return 0;
+       }
+
+       fcport = kmalloc(sizeof(struct fc_port), GFP_ATOMIC);
+
+       if (!fcport) {
+               eprintk(vhba, "Couldn't allocate fcport\n");
+               return 1;
+       }
+       memset(fcport, 0, sizeof(struct fc_port));
+       fcport->loop_id = loop_id;
+       fcport->lun_count = lun_count;
+       fcport->supported_classes |= FC_COS_CLASS3;
+
+       for (k = 0; k < lun_count; k++)
+               fcport->lun_ids[k] = -1;
+
+       for (k = 0; k < lun_count; k++) {
+               if (lun_id) {
+                       dprintk(TRC_IO, vhba,
+                               "Adding lun id %d to list\n", lun_id[k]);
+                       fcport->lun_ids[k] = lun_id[k];
+               } else {
+                       dprintk(TRC_IO, vhba,
+                               "Setting lun id %d to 0 in list\n", lun_id[k]);
+                       fcport->lun_ids[k] = 0;
+               }
+       }
+
+       id = fcport->loop_id;
+       dprintk(TRC_IO, vhba, "fcport loop id:%d\n", id);
+       fcport->d_id.b24 = port_id;
+
+       memcpy(fcport->port_name, port_name, WWN_SIZE);
+       memcpy(fcport->node_name, node_name, WWN_SIZE);
+       fcport->persistent_binding = bound_value;
+
+       add_to_disc_ports(fcport, vhba);
+
+       /*
+        * Check for persistent binding.
+        * if bound value is not -1 then check for valid place...
+        * validate bound value  <= 0 and < 256
+        */
+       tgt = (u32) bound_value;
+       if (bound_value != -1) {
+               tq = TGT_Q(ha, tgt);
+               if (tq == NULL) {
+                       tq = vhba_tgt_alloc(vhba, tgt);
+                       if (tq != NULL) {
+                               memcpy(tq->node_name, fcport->node_name,
+                                      WWN_SIZE);
+                               memcpy(tq->port_name, fcport->port_name,
+                                      WWN_SIZE);
+                               tq->d_id.b24 = fcport->d_id.b24;
+                               fcport->bound = 1;
+                               fcport->os_target_id = tgt;
+                               fcport->tgt_queue = tq;
+                               tq->fcport = fcport;
+                               if (media_type == TYPE_TAPE)
+                                       fcport->flags |= FCF_TAPE_PRESENT;
+                               else
+                                       fcport->flags &= ~FCF_TAPE_PRESENT;
+                               set_bit(TQF_ONLINE, &tq->flags);
+                               atomic_set(&fcport->state, FCS_ONLINE);
+                               if (ha->max_tgt_id < tgt)
+                                       ha->max_tgt_id = tgt;
+                       } else {
+                               id = fcport->loop_id;
+                               fcport->bound = 0;
+                               eprintk(vhba,
+                                       "Unable to allocate tgt");
+                               eprintk(vhba, " for fc_port %d\n", id);
+                               return 1;
+                       }
+               } else {
+                       id = fcport->loop_id;
+                       eprintk(vhba,
+                               "Cannot bind fc_port %d at tgt %d\n",
+                               id, tgt);
+                       /* Make the port unbound which will be
+                          added later to the map */
+                       fcport->bound = 0;
+                       return 1;
+               }
+       } else {
+               /* Make the port unbound which will be added later to the map */
+               fcport->bound = 0;
+               if (media_type == TYPE_TAPE)
+                       fcport->flags |= FCF_TAPE_PRESENT;
+               else
+                       fcport->flags &= ~FCF_TAPE_PRESENT;
+       }
+       return 0;
+}
+
+void vhba_set_tgt_count(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct os_tgt *tq;
+       int tgt;
+
+       ha->target_count = 0;
+       ha->max_tgt_id = 0;
+       for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+               tq = TGT_Q(ha, tgt);
+               if (tq != NULL) {
+                       if (atomic_read(&tq->fcport->state) == FCS_ONLINE) {
+                               dprintk(TRC_INFO, vhba,
+                                       "tgt[%d]: nport_id: 0x%x\n",
+                                       tgt, tq->d_id.b24);
+                               ha->target_count++;
+                               if (ha->max_tgt_id < tgt)
+                                       ha->max_tgt_id = tgt;
+                       }
+               }
+       }
+
+       if (ha->target_count > 0)
+               ha->max_tgt_id++;
+
+       if (ha->max_tgt_id < ha->target_count)
+               ha->max_tgt_id = ha->target_count;
+
+       ha->max_targets = ha->max_tgt_id;
+       dprintk(TRC_INFO, vhba,
+               "RSCN: max id = %d max targets = %d tgt count = %d\n",
+               ha->max_tgt_id, ha->max_targets, ha->target_count);
+}
+
+static u32 vhba_map_unbound_targets(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct os_tgt *tq;
+       struct fc_port *fcport;
+       struct fc_port *fcporttemp;
+       u32 tgt;
+       int id;
+       int free_tgt_found = 0;
+
+       list_for_each_entry_safe(fcport, fcporttemp, &ha->disc_ports, list) {
+               if (fcport->bound)
+                       continue;
+               tgt = ha->max_tgt_id;
+               while (free_tgt_found == 0) {
+                       tq = TGT_Q(ha, tgt);
+                       if (tq == NULL) {
+                               free_tgt_found = 1;
+                               break;
+                       }
+                       tgt++;
+                       if (tgt == ha->max_tgt_id)
+                               break;
+                       if (tgt > MAX_TARGETS)
+                               tgt = 0;
+               }
+               if (free_tgt_found == 0) {
+                       dprintk(TRC_SCSI_ERRS, vhba, "Tgt map is full\n");
+                       return 1;
+               }
+               free_tgt_found = 0;
+               tq = vhba_tgt_alloc(vhba, tgt);
+               if (tq != NULL) {
+                       memcpy(tq->node_name, fcport->node_name, WWN_SIZE);
+                       memcpy(tq->port_name, fcport->port_name, WWN_SIZE);
+                       tq->d_id.b24 = fcport->d_id.b24;
+                       fcport->bound = 1;
+                       fcport->os_target_id = tgt;
+                       fcport->tgt_queue = tq;
+                       tq->fcport = fcport;
+                       set_bit(TQF_ONLINE, &tq->flags);
+                       atomic_set(&fcport->state, FCS_ONLINE);
+               } else {
+                       id = fcport->loop_id;
+                       eprintk(vhba, "alloc failed for fc_port %x" "\n", id);
+                       return 1;
+               }
+       }
+       vhba_set_tgt_count(vhba);
+       return 0;
+}
+
+void vhba_mark_tgts_lost(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       struct os_tgt *tq;
+       u16 tgt;
+
+       for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+               tq = TGT_Q(ha, tgt);
+               if (tq == NULL)
+                       continue;
+               set_bit(TQF_SUSPENDED, &tq->flags);
+               atomic_set(&tq->fcport->state, FCS_DEVICE_LOST);
+       }
+}
+
+void ib_link_dead_poll(struct scsi_xg_vhba_host *ha)
+{
+       struct virtual_hba *vhba = ha->vhba;
+
+       if (atomic_read(&ha->ib_status) != VHBA_IB_DOWN)
+               return;
+
+       if (atomic_read(&ha->ib_link_down_cnt)) {
+               if (!atomic_dec_and_test(&ha->ib_link_down_cnt))
+                       return;
+       } else
+               return;
+
+       atomic_set(&ha->ib_status, VHBA_IB_DEAD);
+       dprintk(TRC_INFO, vhba, "Marking IB link dead\n");
+}
+
+void ib_link_down(struct scsi_xg_vhba_host *ha)
+{
+       struct virtual_hba *vhba = ha->vhba;
+       struct vhba_xsmp_msg *msg;
+       u32 ib_timeout;
+
+       if (atomic_read(&ha->ib_status) != VHBA_IB_UP)
+               return;
+
+       msg = (struct vhba_xsmp_msg *)vhba->cfg;
+
+       ib_timeout = msg->linkdowntimeout;
+
+       if (ib_timeout > 60)
+               ib_timeout = 60;
+       dprintk(TRC_INFO, vhba, "IB down, timer=%d\n", ib_timeout);
+
+       if (ib_timeout < 5) {
+               atomic_set(&ha->ib_status, VHBA_IB_DEAD);
+       } else {
+               atomic_set(&ha->ib_status, VHBA_IB_DOWN);
+               atomic_set(&ha->ib_link_down_cnt,
+                          ib_timeout / WQ_PERIODIC_TIMER);
+       }
+}
+
+void dump_iocb(struct cmd_type_7 *cmd_pkt)
+{
+
+       pr_alert("IOCB Data:\n");
+       pr_alert("Entry Type: 0x%x\tEntry Count: 0x%x\n",
+                cmd_pkt->entry_type, cmd_pkt->entry_count);
+       pr_alert("IOCB Handle : 0x%x\n", cmd_pkt->handle);
+       pr_alert("N_Port Handle: 0x%x\n", cmd_pkt->nport_handle);
+       pr_alert("Data Segment Count: 0x%x\tFCP_LUN: 0x%x\n",
+                cmd_pkt->dseg_count, cmd_pkt->lun[0]);
+       pr_alert("Task (Operation): 0x%x\tTotal Data Byte Count: 0x%x\n",
+                cmd_pkt->task_mgmt_flags, cmd_pkt->byte_count);
+       pr_alert("Target ID (Port ID): [0]: 0x%x\t[1]: 0x%x\t[2]: 0x%x\n",
+                cmd_pkt->port_id[0], cmd_pkt->port_id[1], cmd_pkt->port_id[2]);
+       pr_alert("VP Index: 0x%x\tData Segment Length: 0x%x\n",
+                cmd_pkt->vp_index, cmd_pkt->dseg_0_len);
+       pr_alert("Data Segment Address: 0x%x_%x\n",
+                cmd_pkt->dseg_0_address[1], cmd_pkt->dseg_0_address[0]);
+       pr_alert("\n");
+}
+
+/*
+ * Used by San Boot.
+ * Returns 1 if atleast one Disc Is Up.
+ * Returns 0 if all Discs are Not Ready
+ */
+int vhba_check_discs_status(void)
+{
+       struct virtual_hba *vhba = NULL;
+
+       read_lock_bh(&vhba_global_lock);
+       list_for_each_entry(vhba, &vhba_g.list, list) {
+               if (vhba->ha->discs_ready_flag) {
+                       read_unlock_bh(&vhba_global_lock);
+                       dprintk(TRC_ERRORS, vhba,
+                               "vhba_check_discs_status: found 1 disc Up\n");
+                       return 1;
+               }
+       }
+       read_unlock_bh(&vhba_global_lock);
+       dprintk(TRC_ERRORS, vhba, "vhba_check_discs_status:No disc is Up\n");
+       return 0;
+}
+EXPORT_SYMBOL(vhba_check_discs_status);
+
+/*
+ * Used by San Boot.
+ * Returns # of VHBAs created.
+ */
+
+int check_number_of_vhbas_provisioned(void)
+{
+       return atomic_read(&vhba_count);
+}
+EXPORT_SYMBOL(check_number_of_vhbas_provisioned);
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_main.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_main.c
new file mode 100644 (file)
index 0000000..0a112bd
--- /dev/null
@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * vhba_main.c
+ */
+
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+/* #include <linux/smp_lock.h> */
+#include <linux/proc_fs.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include "vhba_os_def.h"
+#include "vhba_ib.h"
+#include "vhba_defs.h"
+
+#include "xscore.h"
+#include "vhba_xsmp.h"
+#include "xsmp_session.h"
+
+#ifndef XSIGO_LOCAL_VERSION
+#define DRIVER_VERSION "0.5.1"
+#else
+#define DRIVER_VERSION XSIGO_LOCAL_VERSION
+#endif
+
+#define DRIVER_VERSION_STRING "Xsigo Virtual HBA Driver for Linux v"   \
+                                                       DRIVER_VERSION
+#define VHBA_MAJOR                             0
+
+MODULE_AUTHOR("Oracle corp (OVN-linux-drivers@oracle.com)");
+MODULE_DESCRIPTION("OVN VHBA Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+/***********************************
+ * Module parameters: starts here  *
+ ***********************************/
+
+int cmds_per_lun = 16;
+module_param(cmds_per_lun, int, S_IRWXU);
+
+int vhba_multiple_q = 1;
+module_param(vhba_multiple_q, int, S_IRWXU);
+
+int vhba_max_transfer_size = VHBA_DEFAULT_TRANSFER_SIZE;
+module_param(vhba_max_transfer_size, int, S_IRWXU);
+
+int vhba_max_q_depth = VHBA_MAX_VH_Q_DEPTH;
+module_param(vhba_max_q_depth, int, S_IRWXU);
+
+int vhba_debug = 0x200001;
+module_param(vhba_debug, int, 0644);
+
+int vhba_max_scsi_retry = VHBA_MAX_SCSI_RETRY;
+module_param(vhba_max_scsi_retry, int, S_IRWXU);
+
+int vhba_default_scsi_timeout = VHBA_DEFAULT_SCSI_TIMEOUT;
+module_param(vhba_default_scsi_timeout, int, S_IRWXU);
+
+int vhba_wait_in_boot = 1;
+module_param(vhba_wait_in_boot, int, 0644);
+
+int vhba_wait_per_vhba = 30;
+module_param(vhba_wait_per_vhba, int, 0644);
+
+int vhba_abort_recovery_count = 15;    /* 15*2 = 30 seconds */
+module_param(vhba_abort_recovery_count, int, 0644);
+
+/****************************************
+ * Module parameters: Ends here                 *
+ ****************************************/
+struct scsi_transport_template *vhba_transport_template;
+unsigned long vhba_wait_time;
+char vhba_version_str[40];
+int vhba_xsmp_service_id;
+int vhba_max_dsds_in_fmr;
+int vhba_max_fmr_pages;
+
+DEFINE_IDR(vhba_idr_table);
+
+u32 vhba_current_idr = MAX_VHBAS;
+atomic_t vhba_count;
+rwlock_t vhba_global_lock;
+
+struct virtual_hba vhba_g;
+struct vhba_xsmp_stats vhba_xsmp_stats;
+
+static const struct file_operations vhba_fops = {
+read:  vhba_read,
+write :        vhba_write,
+open : vhba_open,
+release : vhba_release,
+};
+
+int vhba_wait_all_vhbas_up(void)
+{
+       int time, delayms = 200;
+       int vhba_count = 0;
+
+       /* Wait for 30 seconds */
+       dprintk(TRC_INIT, NULL, "%s Checking VHBA's state\n", __func__);
+
+       for (time = 0; time < vhba_wait_per_vhba * 1000; time += delayms) {
+               vhba_count = check_number_of_vhbas_provisioned();
+               if (vhba_count > 0) {
+                       dprintk(TRC_INIT, NULL, "%s Found %d vhbas\n",
+                               __func__, vhba_count);
+                       break;
+               }
+               msleep(delayms);
+       }
+
+       if (vhba_count <= 0) {
+               dprintk(TRC_INIT, NULL, "%s Found 0 vhbas\n", __func__);
+
+               return 0;
+       }
+
+       /* Wait for 100 seconds */
+       for (time = 0; time < 500; time++) {
+               if (vhba_check_discs_status()) {
+                       dprintk(TRC_INIT, NULL, "%s Found disc status\n",
+                               __func__);
+                       return 1;
+               }
+               msleep(delayms);
+       }
+
+       return 0;
+}
+
+static void vhba_wait_for_vhbas(void)
+{
+       unsigned long wait_time = jiffies;
+
+       if (vhba_wait_in_boot && xscore_wait_for_sessions(0)) {
+               pr_info("XSVHBA: Waiting for VHBA's to come up .....\n");
+               if (vhba_wait_all_vhbas_up()) {
+                       dprintk(TRC_INIT, NULL,
+                               "%s VHBA's are ready with discs\n", __func__);
+               } else {
+                       dprintk(TRC_INIT, NULL,
+                               "%s VHBA's are NOT ready with discs\n",
+                               __func__);
+               }
+       }
+       vhba_wait_time = jiffies - wait_time;
+}
+
+int dev_major;
+
+/*
+ * vhba_module_init - Module initialization.
+ */
+static int __init vhba_module_init(void)
+{
+       dprintk(TRC_INIT, NULL, "%s\n", DRIVER_VERSION_STRING);
+       dprintk(TRC_INIT, NULL, "Driver queue depth is %d\n", cmds_per_lun);
+       dprintk(TRC_INIT, NULL, "Driver max transfer size is %dKB\n",
+               vhba_max_transfer_size / 2);
+       dprintk(TRC_INIT,
+               NULL, "\nBuild date: " __DATE__ " @ " __TIME__ "\n\n");
+
+       /* Probably needs to be added to the regular linux driver */
+       vhba_transport_template =
+           fc_attach_transport(&vhba_transport_functions);
+
+       vhbawq_init();
+       vhbawq_queue();
+
+       rwlock_init(&vhba_global_lock);
+       INIT_LIST_HEAD(&vhba_g.list);
+
+       /* Register with XCPM module for receiving XSMP messages */
+       if (vhba_register_xsmp_service()) {
+               eprintk(NULL, "vhba_register_xsmp_service() failed!\n");
+               goto init_failed;
+       }
+
+       if (vhba_create_procfs_root_entries()) {
+               eprintk(NULL, "vhba_create_procfs_root_entries() failed!\n");
+               vhba_unregister_xsmp_service();
+               goto init_failed;
+       }
+
+       /* register a character interface here... */
+       dev_major = register_chrdev(VHBA_MAJOR, "svhba", &vhba_fops);
+
+       if (dev_major < 0) {
+               dprintk(TRC_ERRORS,
+                       NULL, "char device registration failed for vhba\n");
+               eprintk(NULL, "register chrdev() failed\n");
+               vhba_unregister_xsmp_service();
+               vhba_remove_procfs_root_entries();
+               goto init_failed;
+       }
+       /* Wait for vhba's to come up */
+       vhba_wait_for_vhbas();
+       return 0;
+
+init_failed:
+       fc_release_transport(vhba_transport_template);
+       return -1;
+}
+
+/*
+ * vhba_module_exit - Module cleanup routine.
+ */
+static void __exit vhba_module_exit(void)
+{
+       struct virtual_hba *vhba;
+       struct virtual_hba *tmp_vhba;
+
+       vhba_unregister_xsmp_service();
+
+       vhbawq_cleanup();
+
+       list_for_each_entry_safe(vhba, tmp_vhba, &vhba_g.list, list) {
+               if (vhba->cfg)
+                       wake_up_interruptible(&vhba->timer_wq);
+               vhba_delete(vhba->resource_id);
+       }
+       vhba_remove_procfs_root_entries();
+
+       if (dev_major >= 0)
+               unregister_chrdev(dev_major, "svhba");
+
+       fc_release_transport(vhba_transport_template);
+
+       dprintk(0, NULL, "Xsigo Virtual HBA driver is unloaded\n");
+}
+
+ssize_t vhba_read(struct file *filp, char *buf, size_t size, loff_t *offp)
+{
+       return 0;
+}
+
+ssize_t vhba_write(struct file *filp, const char *buf, size_t size,
+                  loff_t *offp)
+{
+       return 0;
+}
+
+int vhba_open(struct inode *inode, struct file *filp)
+{
+       int minor;
+
+       minor = MINOR(inode->i_rdev);
+       return 0;
+}
+
+int vhba_release(struct inode *inode, struct file *filp)
+{
+       int minor;
+
+       minor = MINOR(inode->i_rdev);
+       return 0;
+}
+
+/*
+ * Called from thread context
+ */
+static void vhba_xsmp_event_handler(xsmp_cookie_t xsmp_hndl, int event)
+{
+       struct virtual_hba *vhba, *tmp_vhba;
+       unsigned long flags = 0;
+
+       switch (event) {
+       case XSCORE_CONN_CONNECTED:
+               read_lock_bh(&vhba_global_lock);
+               list_for_each_entry(vhba, &vhba_g.list, list) {
+                       if (xsmp_sessions_match(&vhba->xsmp_info, xsmp_hndl))
+                               vhba->xsmp_hndl = xsmp_hndl;
+               }
+               read_unlock_bh(&vhba_global_lock);
+               break;
+       case XSCORE_DEVICE_REMOVAL:
+               read_lock_irqsave(&vhba_global_lock, flags);
+               list_for_each_entry_safe(vhba, tmp_vhba, &vhba_g.list, list) {
+                       if (xsmp_sessions_match(&vhba->xsmp_info, xsmp_hndl)) {
+                               read_unlock_irqrestore(&vhba_global_lock,
+                                                      flags);
+                               (void)vhba_delete(vhba->resource_id);
+                               read_lock_irqsave(&vhba_global_lock, flags);
+                       }
+               }
+               read_unlock_irqrestore(&vhba_global_lock, flags);
+               break;
+               /* At present we don't need to worry about any other cases */
+       case XSCORE_PORT_UP:
+       case XSCORE_PORT_DOWN:
+       default:
+               break;
+       }
+}
+
+int vhba_register_xsmp_service(void)
+{
+       struct xsmp_service_reg_info service_info = {
+               .receive_handler = vhba_receive_handler,
+               .event_handler = vhba_xsmp_event_handler,
+               .ctrl_message_type = XSMP_MESSAGE_TYPE_VHBA,
+               .resource_flag_index = RESOURCE_FLAG_INDEX_VHBA
+       };
+
+       vhba_xsmp_service_id = xcpm_register_service(&service_info);
+       if (vhba_xsmp_service_id < 0) {
+               eprintk(NULL, "Unable to register with XCPM\n");
+               return -1;
+       }
+       return 0;
+}
+
+void vhba_unregister_xsmp_service(void)
+{
+       int ret = 0;
+
+       ret = xcpm_unregister_service(vhba_xsmp_service_id);
+       if (ret != 0) {
+               eprintk(NULL, "Unable to unregister from XCPM %d\n", ret);
+       } else
+               dprintk(TRC_INIT, NULL, "Completed xcpm unregister\n");
+}
+
+module_init(vhba_module_init);
+module_exit(vhba_module_exit);
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_os_def.h b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_os_def.h
new file mode 100644 (file)
index 0000000..0fce758
--- /dev/null
@@ -0,0 +1,1187 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __VHBA_OSDEF_H__
+#define __VHBA_OSDEF_H__
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/idr.h>
+#include <linux/dma-mapping.h>
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <linux/dmapool.h>
+#include <linux/spinlock.h>
+#include <linux/completion.h>
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_eh.h>
+
+#include <scsi/scsi_transport_fc.h>
+
+#include <rdma/ib_verbs.h>
+#include "xscore.h"
+#include "xsmp_common.h"
+
+#define xg_spin_lock_irqsave(lock, flags)                      \
+       spin_lock_irqsave(lock, flags)
+
+#define xg_spin_unlock_irqrestore(lock, flags)                 \
+       spin_unlock_irqrestore(lock, flags)
+
+#define xg_spin_lock_irq(lock)                                 \
+       spin_lock_irq(lock)
+
+#define xg_spin_unlock_irq(lock)                               \
+       spin_unlock_irq(lock)
+
+#define IB_WC_MSG_LEN                   (128+8)
+
+#define VHBA_DEFAULT_SCSI_TIMEOUT              60      /* 60 seconds */
+#define EXTEND_CMD_TIMEOUT                     80      /* 80 seconds */
+#define IB_CMD_TIMEOUT_DELTA           3       /* 3 seconds */
+#define DEFER_LIST_TIMEOUT                70   /* 70 seconds */
+#define WQ_PERIODIC_TIMER                      5       /* 5 seconds */
+#define PERIODIC_DEFER_CNT             (DEFER_LIST_TIMEOUT/WQ_PERIODIC_TIMER)
+
+#define VHBA_MAX_SCSI_RETRY                    60
+#define FORCE_FLUSH_DEFE_LIST                  1
+#define NO_FORCE_FLUSH_DEFE_LIST               0
+
+#define INVALID_FIELD_IN_CDB          0x24
+
+#define VHBA_STATE_NOT_ACTIVE           0
+#define VHBA_STATE_ACTIVE               1
+#define VHBA_STATE_SCAN                 2
+#define VHBA_STATE_BUSY                        3
+
+#define VHBA_QID_ENABLE                                1
+#define VHBA_MAX_VH_Q_DEPTH                    16
+#define VHBA_MAX_VH_Q_COUNT                    4
+
+/* SCSI maximum CDB size */
+#define MAX_CMDSZ                              16
+#define MAX_OUTSTANDING_COMMANDS               1024
+#define MAX_IO_DESCRIPTORS                     32
+#define MAX_FIBRE_TARGETS                      128
+#define MAX_FIBRE_LUNS                    256
+#define MAX_FIBRE_LUNS_MORE               256
+
+#define MAX_BUSES                              1
+#define MAX_TARGETS                            MAX_FIBRE_TARGETS
+#define MAX_VHBA_QUEUES                                4
+
+#define REQUEST_ENTRY_CNT_24XX         1024    /* Number of request entries */
+
+/*
+ * Status entry SCSI status bit definitions
+ */
+/* Reserved bits BIT_12-BIT_15 */
+#define SS_MASK                                                0xfff
+#define SS_RESIDUAL_UNDER                              BIT_11
+#define SS_RESIDUAL_OVER                               BIT_10
+#define SS_SENSE_LEN_VALID                             BIT_9
+#define SS_RESPONSE_INFO_LEN_VALID                     BIT_8
+
+#define SS_RESERVE_CONFLICT                            (BIT_4 | BIT_3)
+#define SS_BUSY_CONDITION                              BIT_3
+#define SS_CONDITION_MET                               BIT_2
+#define SS_CHECK_CONDITION                             BIT_1
+
+/*
+ * Status entry completion status
+ */
+#define CS_COMPLETE                    0x0     /* No errors */
+#define CS_INCOMPLETE                  0x1     /* Incomplete transfer of cmd */
+#define CS_DMA                         0x2     /* A DMA direction error. */
+#define CS_TRANSPORT                   0x3     /* Transport error */
+#define CS_RESET                       0x4     /* SCSI bus reset occurred */
+#define CS_ABORTED                     0x5     /* System aborted command */
+#define CS_TIMEOUT                     0x6     /* Timeout error */
+#define CS_DATA_OVERRUN                        0x7     /* Data overrun */
+
+#define CS_DATA_UNDERRUN               0x15    /* Data Underrun */
+#define CS_QUEUE_FULL                  0x1C    /* Queue Full */
+#define CS_PORT_UNAVAILABLE            0x28    /* Port unavailable */
+                                               /* (selection timeout) */
+#define CS_PORT_LOGGED_OUT             0x29    /* Port Logged Out */
+#define CS_PORT_CONFIG_CHG             0x2A    /* Port Configuration Changed */
+#define CS_PORT_BUSY                   0x2B    /* Port Busy */
+#define CS_COMPLETE_CHKCOND            0x30    /* Error? */
+#define CS_BAD_PAYLOAD                 0x80    /* Driver defined */
+#define CS_UNKNOWN                     0x81    /* Driver defined */
+#define CS_RETRY                       0x82    /* Driver defined */
+#define CS_LOOP_DOWN_ABORT             0x83    /* Driver defined */
+
+#define WWN_SIZE                               8
+
+#define LINK_DOWN                              0
+#define LINK_UP                                        1
+#define LINK_DEAD                              2
+
+#define TGT_LOST                               1
+#define TGT_FOUND                              0
+#define TGT_DEAD                               2
+
+#define LUN_ID_SCHEME
+
+struct xt_cm_private_data {
+       u64 vid;
+       u16 qp_type;
+       u16 max_ctrl_msg_size;
+       u32 data_qp_type;
+} __packed;
+
+struct xg_scsi_lun {
+       u8 scsi_lun[8];
+};
+
+struct _vhba_init_ {
+       u8 port_id[3];
+       u8 vp_index;
+       u16 n_port_handle;
+       u16 lun;
+};
+
+struct vhba_io_cmd {
+       u8 cmd[MAX_CMDSZ];
+       u32 cmd_len;
+       u8 *buf[6];
+       u32 buf_size[6];
+};
+
+#define ADD_VHBA                               1
+#define DELETE_VHBA                            2
+#define SEND_NOP                               3
+#define SEND_DISK_READ                         4
+#define SEND_DISK_WRITE                                5
+#define SET_LID                                        6
+
+union target_id {
+       u16 extended;
+       struct {
+               u8 reserved;
+               u8 standard;
+       } id;
+};
+
+#define COMMAND_TYPE_7  0x18
+struct cmd_type_7 {
+       u8 entry_type;          /* Entry type */
+       u8 entry_count;         /* Entry count */
+       u8 sys_define;          /* System defined */
+       u8 entry_status;        /* Entry Status */
+
+       u32 handle;             /* System handle */
+
+       u16 nport_handle;       /* N_PORT handle */
+       u16 timeout;            /* Command timeout */
+#define FW_MAX_TIMEOUT          0x1999
+
+       u16 dseg_count;         /* Data segment count */
+       u16 reserved_1;
+
+       u8 lun[8];              /* FCP LUN (BE) */
+
+       u16 task_mgmt_flags;    /* Task management flags */
+
+#define TMF_CLEAR_ACA           BIT_14
+#define TMF_TARGET_RESET        BIT_13
+#define TMF_LUN_RESET           BIT_12
+#define TMF_CLEAR_TASK_SET      BIT_10
+#define TMF_ABORT_TASK_SET      BIT_9
+#define TMF_READ_DATA           BIT_1
+#define TMF_WRITE_DATA          BIT_0
+
+       u8 task;
+#define TSK_SIMPLE              0
+#define TSK_HEAD_OF_QUEUE       1
+#define TSK_ORDERED             2
+#define TSK_ACA                 4
+#define TSK_UNTAGGED            5
+
+       u8 crn;
+
+       u8 fcp_cdb[MAX_CMDSZ];  /* SCSI command words */
+       u32 byte_count;         /* Total byte count */
+
+       u8 port_id[3];          /* PortID of destination port */
+       u8 vp_index;
+
+       u32 dseg_0_address[2];  /* Data segment 0 address */
+       u32 dseg_0_len;         /* Data segment 0 length  */
+
+       u32 rkey1;              /* Xg extensions to IOCBS  */
+       u32 rkey2;              /* to accommodate           */
+       u32 rkey3;              /* rkeys for dsds          */
+       u32 rkey4;
+       u32 rkey5;
+
+       u32 xg_rsvd[11];
+};
+
+#define CONTINUE_A64_TYPE       0x0A   /* Continuation A64 entry  */
+struct cont_a64_entry {
+       u8 entry_type;          /* Entry type */
+       u8 entry_count;         /* Entry count */
+       u8 sys_define;          /* System defined */
+       u8 entry_status;        /* Entry Status */
+       u32 dseg_0_address[2];  /* Data segment 0 address */
+       u32 dseg_0_length;      /* Data segment 0 length  */
+       u32 dseg_1_address[2];  /* Data segment 1 address */
+       u32 dseg_1_length;      /* Data segment 1 length  */
+       u32 dseg_2_address[2];  /* Data segment 2 address */
+       u32 dseg_2_length;      /* Data segment 2 length  */
+       u32 dseg_3_address[2];  /* Data segment 3 address */
+       u32 dseg_3_length;      /* Data segment 3 length  */
+       u32 dseg_4_address[2];  /* Data segment 4 address */
+       u32 dseg_4_length;      /* Data segment 4 length  */
+
+       u32 rkey1;              /* Xg extensions to IOCBS */
+       u32 rkey2;              /* to accommodate          */
+       u32 rkey3;              /* rkeys for dsds         */
+       u32 rkey4;
+       u32 rkey5;
+
+       u32 xg_rsvd[11];
+};
+
+#define STATUS_TYPE     0x03   /* Status entry */
+struct sts_entry_24xx {
+       u8 entry_type;          /* Entry type */
+       u8 entry_count;         /* Entry count */
+       u8 sys_define;          /* System defined */
+       u8 entry_status;        /* Entry Status */
+
+       u32 handle;             /* System handle */
+
+       u16 comp_status;        /* Completion status */
+       u16 ox_id;              /* OX_ID used by the firmware */
+
+       u32 residual_len;       /* Residual transfer length */
+
+       u16 reserved_1;
+       u16 state_flags;        /* State flags */
+#define SF_TRANSFERRED_DATA     BIT_11
+#define SF_FCP_RSP_DMA          BIT_0
+
+       u16 reserved_2;
+       u16 scsi_status;        /* SCSI status */
+#define SS_CONFIRMATION_REQ             BIT_12
+
+       u32 rsp_residual_count; /* FCP RSP residual count */
+
+       u32 sense_len;          /* FCP SENSE length */
+       u32 rsp_data_len;       /* FCP response data length */
+
+       u8 data[28];            /* FCP response/sense information */
+};
+
+/*
+* Status entry completion status
+*/
+#define CS_DATA_REASSEMBLY_ERROR 0x11  /* Data Reassembly Error */
+#define CS_ABTS_BY_TARGET        0x13  /* Target send ABTS to abort IOCB */
+#define CS_FW_RESOURCE           0x2C  /* Firmware Resource Unavailable */
+#define CS_TASK_MGMT_OVERRUN     0x30  /* Task management overrun (8+) */
+#define CS_ABORT_BY_TARGET       0x47  /* Abort By Target */
+
+#define STATUS_CONT_TYPE         0x10  /* Status continuation entry */
+struct sts_cont_entry {
+       u8 entry_type;          /* Entry type */
+       u8 entry_count;         /* Entry count */
+       u8 sys_define;          /* System defined */
+       u8 entry_status;        /* Entry Status */
+       u8 data[60];            /* data */
+};
+
+#define MARKER_TYPE     0x04   /* Marker entry */
+struct mrk_entry_24xx {
+       u8 entry_type;          /* Entry type */
+       u8 entry_count;         /* Entry count */
+       u8 handle_count;        /* Handle count */
+       u8 entry_status;        /* Entry Status */
+
+       u32 handle;             /* System handle */
+
+       u16 nport_handle;       /* N_PORT handle */
+
+       u8 modifier;            /* Modifier (7-0) */
+#define MK_SYNC_ID_LUN  0      /* Synchronize ID/LUN */
+#define MK_SYNC_ID      1      /* Synchronize ID */
+#define MK_SYNC_ALL     2      /* Synchronize all ID/LUN */
+       u8 reserved_1;
+
+       u8 reserved_2;
+       u8 vp_index;
+
+       u16 reserved_3;
+
+       u8 lun[8];              /* FCP LUN (BE) */
+       u8 reserved_4[40];
+};
+
+struct response {
+       u8 data[60];
+       u32 signature;
+#define RESPONSE_PROCESSED      0xDEADDEAD     /* Signature */
+};
+
+#define ABORT_IOCB_TYPE 0x33
+struct abort_entry_24xx {
+       u8 entry_type;          /* Entry type */
+       u8 entry_count;         /* Entry count */
+       u8 handle_count;        /* Handle count */
+       u8 entry_status;        /* Entry Status */
+
+       u32 handle;             /* System handle */
+
+       u16 nport_handle;       /* N_PORT handle */
+       /* or Completion status */
+
+       u16 options;            /* Options */
+#define AOF_NO_ABTS             BIT_0  /* Do not send any ABTS */
+
+       u32 handle_to_abort;    /* System handle to abort */
+
+       u8 reserved_1[32];
+
+       u8 port_id[3];          /* PortID of destination port */
+       u8 vp_index;
+
+       u8 reserved_2[12];
+};
+
+#define TSK_MGMT_IOCB_TYPE      0x14
+struct tsk_mgmt_entry {
+       u8 entry_type;          /* Entry type */
+       u8 entry_count;         /* Entry count */
+       u8 handle_count;        /* Handle count */
+       u8 entry_status;        /* Entry Status */
+
+       u32 handle;             /* System handle */
+
+       u16 nport_handle;       /* N_PORT handle */
+
+       u16 reserved_1;
+
+       u16 delay;              /* Activity delay in seconds */
+
+       u16 timeout;            /* Command timeout */
+
+       u8 lun[8];              /* FCP LUN (BE) */
+
+       u32 control_flags;      /* Control Flags */
+#define TCF_NOTMCMD_TO_TARGET   BIT_31
+#define TCF_LUN_RESET           BIT_4
+#define TCF_ABORT_TASK_SET      BIT_3
+#define TCF_CLEAR_TASK_SET      BIT_2
+#define TCF_TARGET_RESET        BIT_1
+#define TCF_CLEAR_ACA           BIT_0
+
+       u8 reserved_2[20];
+
+       u8 port_id[3];          /* PortID of destination port */
+       u8 vp_index;
+
+       u8 reserved_3[12];
+};
+
+struct scsi_xg_vhba_host;
+
+#define MAX_VHBAS  32
+
+/* Messages on Data QP */
+#define INIT_BLOCK            0x1
+#define WRITE_INDEX_UPDATE    0x2
+#define RING_UPDATE           0x5
+
+#define READ_INDEX_UPDATE     0x7
+
+/* Messages on Control QP */
+#define ENABLE_VHBA_Q       0x1
+#define DISABLE_VHBA_Q      0x2
+#define TGT_RESET           0x3
+#define LINK_RESET          0x4
+#define ABORT_CMD                      0x5
+#define LUN_RESET                      0x6
+
+#define ENABLE_RSP            0x7
+#define DISC_INFO_UPDATE      0x8
+#define DISC_INFO_CONT_UPDATE 0x9
+#define PLINK_STATUS_UPDATE   0xA
+#define TGT_STATUS_UPDATE     0xB
+#define VHBA_HEART_BEAT       0x13     /* 0x0xC ~ 0x12 for FC HBA API */
+
+/* 24 bit FC port id ... */
+union port_id {
+       u32 b24:24;
+
+       struct {
+               u8 d_id[3];
+               u8 rsvd_1;
+       } r;
+
+       struct {
+               u8 al_pa;
+               u8 area;
+               u8 domain;
+               u8 rsvd_1;
+       } b;
+};
+
+/* Ring related structures ... */
+struct init_block {
+       u8 type;
+       u8 entry_size;
+       u16 ring_size;
+       u32 _reserved;
+
+       u32 read_index_rkey;
+       u32 base_addr_rkey;
+
+       u64 read_index_addr;
+       u64 base_addr;
+};
+
+struct enable_msg {
+       u8 type;
+       u8 rsvd;
+       u8 rsvd1;
+       u8 rsvd2;
+       u64 resource_id;
+};
+
+struct heart_beat_msg {
+       u8 type;
+       u8 rsvd;
+       u8 rsvd1;
+       u8 rsvd2;
+       u64 resource_id;
+};
+
+struct enable_rsp {
+       u8 type;
+       u8 rsvd;
+       u8 rsvd1;
+       u8 vp_index;
+       u32 rsvd2;
+       u64 resource_id;
+};
+
+struct vhba_link_status {
+       u8 type;
+       u8 _reserved1;
+       u16 phy_link_status;
+       u32 conn_down_timeout;
+};
+
+struct tgt_info {
+       u16 lun_count;
+       u16 loop_id;
+       u32 persistent_binding;
+       u32 port_id;
+       u8 media_type;
+       u8 _reserved[3];
+       u8 wwpn[WWN_SIZE];
+       u8 lun_map[MAX_FIBRE_LUNS >> 3];
+       u16 lun_ids[MAX_FIBRE_LUNS];
+       u8 wwnn[WWN_SIZE];
+};
+
+struct vhba_discovery_msg {
+       u8 type;
+       u8 _reserved1;
+       u16 queue_number;
+       u16 target_count;
+       u16 cont_count;
+       /* Tgts (at the most 1 struct tgt_info) */
+       struct tgt_info tgt_data[1];
+       u32 fcid;
+};
+
+struct vhba_discovery_cont_msg {
+       u8 type;
+       u8 seg_num;
+       u16 target_count;
+       /* Tgts (at the most 1 struct tgt_info) */
+       struct tgt_info tgt_data[1];
+};
+
+struct vhba_write_index_msg {
+       u8 type;
+       u8 _reserved1;
+       u16 write_index;
+       u32 _reserved;
+};
+
+struct vhba_tgt_status_msg {
+       u8 type;
+       u8 media_type;
+       u8 rscn_addr_format;
+       u8 flag;
+       u16 loop_id;
+       u16 _reserved3;
+       u8 wwpn[WWN_SIZE];
+       u32 port_id;
+       u32 persistent_binding;
+       u16 lun_count;
+       u16 _reserved4;
+       u8 lun_map[MAX_FIBRE_LUNS >> 3];
+       u16 lun_ids[MAX_FIBRE_LUNS];
+       u8 wwnn[WWN_SIZE];
+       u32 port_down_timeout;
+};
+
+struct vhba_abort_cmd {
+       u8 type;
+       u8 _reserved1;
+       u8 _reserved2;
+       u8 _reserved3;
+       u16 vhba_id;
+       u16 _reserved4;
+       u32 handle_to_abort;
+       u8 port_id[3];
+       u8 _reserved5;
+};
+
+struct vhba_lun_reset_msg {
+       u8 type;
+       u8 _reserved1;
+       u8 _reserved2;
+       u8 flag;
+       u16 vhba_id;
+       u16 lun;
+       u8 wwpn[WWN_SIZE];
+};
+
+struct vhba_tgt_reset_msg {
+       u8 type;
+       u8 _reserved1;
+       u8 _reserved2;
+       u8 flag;
+       u16 vhba_id;
+       u16 _reserved3;
+       u8 wwpn[WWN_SIZE];
+};
+
+struct vhba_link_reset_msg {
+       u8 type;
+       u8 _reserved1;
+       u16 vhba_id;
+};
+
+#define MAX_VHBA_MSG_SIZE sizeof(struct init_block)
+#define MAX_VHBA_NAME_SIZE 16
+#define MAX_CHASSIS_NAME_SIZE 32
+#define MAX_SESSION_NAME_SIZE 32       /* Server Profile Name Size */
+
+#define BIT_0   0x1
+#define BIT_1   0x2
+#define BIT_2   0x4
+#define BIT_3   0x8
+#define BIT_4   0x10
+#define BIT_5   0x20
+#define BIT_6   0x40
+#define BIT_7   0x80
+#define BIT_8   0x100
+#define BIT_9   0x200
+#define BIT_10  0x400
+#define BIT_11  0x800
+#define BIT_12  0x1000
+#define BIT_13  0x2000
+#define BIT_14  0x4000
+#define BIT_15  0x8000
+
+#define LSB(x)  ((u8)(x))
+#define MSB(x)  ((u8)((u16)(x) >> 8))
+
+#define LSW(x)  ((u16)(x))
+#define MSW(x)  ((u16)((u32)(x) >> 16))
+
+#define LSD(x)        ((u32)((u64)(x)))
+#define MSD(x)        ((u32)((((u64)(x)) >> 16) >> 16))
+
+#define CMD_SP(cmnd)          ((cmnd)->SCp.ptr)
+
+#define TMF_WRITE_DATA         BIT_0
+
+#define TMF_READ_DATA          BIT_1
+
+#define CMD_SP(cmnd)            ((cmnd)->SCp.ptr)
+#define CMD_COMPL_STATUS(cmnd)  ((cmnd)->SCp.this_residual)
+#define CMD_RESID_LEN(cmnd)     ((cmnd)->SCp.buffers_residual)
+#define CMD_SCSI_STATUS(cmnd)   ((cmnd)->SCp.Status)
+#define CMD_ACTUAL_SNSLEN(cmnd) ((cmnd)->SCp.Message)
+#define CMD_ENTRY_STATUS(cmnd)  ((cmnd)->SCp.have_data_in)
+
+#define DEC_REF_CNT(x) do { \
+                               if (atomic_dec_and_test(&x->ref_cnt)) { \
+                                       wake_up(&x->delete_wq); \
+                               } \
+                       } while (0)
+
+static inline u8 *host_to_fcp_swap(u8 *, u32);
+
+/**
+ * host_to_fcp_swap() -
+ * @fcp:
+ * @bsize:
+ *
+ * Returns
+ */
+static inline u8 *host_to_fcp_swap(u8 *fcp, u32 bsize)
+{
+       u32 *ifcp = (u32 *) fcp;
+       u32 *ofcp = (u32 *) fcp;
+       u32 iter = bsize >> 2;
+
+       for (; iter; iter--)
+               *ofcp++ = swab32(*ifcp++);
+
+       return fcp;
+}
+
+#define VHBA_IO_STATE_ACTIVE           0
+#define VHBA_IO_STATE_ABORTING         1
+#define VHBA_IO_STATE_ABORTED          2
+#define VHBA_IO_STATE_ABORT_FAILED     3
+#define VHBA_IO_STATE_ABORT_NEEDED     4
+#define VHBA_IO_STATE_TIMEDOUT         5
+#define VHBA_IO_STATE_RESET            6
+
+#define SRB_STATE_NO_DEFER_LIST        0
+#define SRB_STATE_DEFER_LIST   1
+
+struct srb {
+       struct list_head list;
+
+       struct scsi_xg_vhba_host *ha;   /* HA the SP is queued on */
+       struct scsi_cmnd *cmd;  /* Linux SCSI command pkt */
+       struct timer_list timer;        /* Command timer */
+       u16 flags;
+
+       /* Request state */
+       u16 state;
+
+       /* Target/LUN queue pointers. */
+       struct os_tgt *tgt_queue;
+       struct os_lun *lun_queue;
+
+       /* Single transfer DMA context */
+       dma_addr_t dma_handle;
+
+       u32 request_sense_length;
+       u8 *request_sense_ptr;
+       u32 queue_num;
+
+       /* Suspend delay */
+       int delay;
+
+       u32 tot_dsds;
+
+       void *pool_fmr[6];
+
+       /* Raw completion info for use by failover ? */
+       u8 fo_retry_cnt;        /* Retry count this request */
+       u8 err_id;              /* error id */
+#define SRB_ERR_PORT    1      /* Request failed -- "port down" */
+#define SRB_ERR_LOOP    2      /* Request failed -- "loop down" */
+#define SRB_ERR_DEVICE  3      /* Request failed -- "device error" */
+#define SRB_ERR_OTHER   4
+
+       int iocb_handle;
+       void *unaligned_sg;
+       int use_copy;
+       void *bounce_buffer;
+       int bounce_buf_len;
+       int use_sg_orig;
+       struct scatterlist *lcl_sg;
+       int lcl_sg_cnt;
+       int abort_cnt;
+
+       u16 error_flag;         /* if page_list allocation fails */
+};
+
+#define MAX_SRB_SIZE sizeof(struct srb)
+
+/*
+* SRB flag definitions
+*/
+#define SRB_TIMEOUT             BIT_0  /* Command timed out */
+#define SRB_DMA_VALID           BIT_1  /* Command sent to ISP */
+#define SRB_WATCHDOG            BIT_2  /* Command on watchdog list */
+#define SRB_ABORT_PENDING       BIT_3  /* Command abort sent to device */
+
+#define SRB_ABORTED             BIT_4  /* Command aborted command already */
+#define SRB_RETRY               BIT_5  /* Command needs retrying */
+#define SRB_GOT_SENSE           BIT_6  /* Command has sense data */
+#define SRB_FAILOVER            BIT_7  /* Command in failover state */
+
+#define SRB_BUSY                BIT_8  /* Command is in busy retry state */
+#define SRB_FO_CANCEL           BIT_9  /* Command don't need to do failover */
+#define SRB_IOCTL               BIT_10 /* IOCTL command. */
+#define SRB_TAPE                BIT_11 /* FCP2 (Tape) command. */
+
+/*
+* SRB state definitions
+*/
+#define SRB_FREE_STATE          0      /*   returned back */
+#define SRB_PENDING_STATE       1      /*   queued in LUN Q */
+#define SRB_ACTIVE_STATE        2      /*   in Active Array */
+#define SRB_DONE_STATE          3      /*   queued in Done Queue */
+#define SRB_RETRY_STATE         4      /*   in Retry Queue */
+#define SRB_SUSPENDED_STATE     5      /*   in suspended state */
+#define SRB_NO_QUEUE_STATE      6      /*   is in between states */
+#define SRB_ACTIVE_TIMEOUT_STATE 7     /*   in Active Array but timed out */
+#define SRB_FAILOVER_STATE      8      /*   in Failover Queue */
+#define SRB_SCSI_RETRY_STATE    9      /*   in Scsi Retry Queue */
+
+struct vhba_ib_stats {
+       u64 cqp_dn_cnt;
+       u64 cqp_up_cnt;
+       u64 cqp_send_err_cnt;
+       u64 cqp_recv_err_cnt;
+       u64 cqp_remote_disconn_cnt;
+       u64 dqp_dn_cnt;
+       u64 dqp_up_cnt;
+       u64 dqp_send_err_cnt;
+       u64 dqp_recv_err_cnt;
+       u64 dqp_remote_disconn_cnt;
+       u64 curr_outstanding_reqs;
+       u64 total_req_q_fulls;
+       u64 total_outstding_q_wraps;
+} __packed;
+
+struct vhba_xsmp_stats {
+       u64 install_msg_cnt;
+       u64 delete_msg_cnt;
+       u64 update_msg_cnt;
+       u64 cfg_stats_msg_cnt;
+       u64 clr_stats_msg_cnt;
+       u64 sync_begin_msg_cnt;
+       u64 sync_end_msg_cnt;
+       u64 oper_req_msg_cnt;
+       u64 unknown_msg_cnt;
+       u64 xt_state_dn_cnt;
+       u64 tca_lid_changed_cnt;
+       u64 abort_all_cnt;
+       u64 boot_msg_cnt;
+       u64 last_unknown_msg;
+       u64 last_msg;
+} __packed;
+
+struct vhba_io_stats {
+       u64 total_read_reqs;
+       u64 total_write_reqs;
+       u64 total_task_mgmt_reqs;
+       u64 total_read_mbytes;
+       u64 total_write_mbytes;
+       u64 total_io_rsp;
+       u64 total_copy_ios;
+       u64 total_copy_page_allocs;
+       u64 total_copy_page_frees;
+       atomic_t vh_q_full_cnt[VHBA_MAX_VH_Q_COUNT];
+       atomic_t num_vh_q_reqs[VHBA_MAX_VH_Q_COUNT];
+       u64 qcmd_busy_ret_cnt;
+} __packed;
+
+struct vhba_fmr_stats {
+       u64 map_cnt;
+       u64 unmap_cnt;
+       u64 map_fail_cnt;
+       u64 unaligned_io_cnt;
+       u64 unaligned_ptr_cnt;
+       u64 total_fmr_ios;
+} __packed;
+
+struct vhba_fc_stats {
+       u64 link_dn_cnt;
+       u64 link_dead_cnt;
+       u64 link_up_cnt;
+       u64 rscn_up_cnt;
+       u64 rscn_dn_cnt;
+       u64 rscn_dead_cnt;
+       u64 rscn_multiple_up_cnt;
+       u64 rscn_multiple_dn_cnt;
+       u64 last_up_tgt;
+       u64 last_dn_tgt;
+       u64 last_dead_tgt;
+       u64 disc_info_cnt;
+       u64 enable_resp_cnt;
+       u64 enable_msg_cnt;
+} __packed;
+
+struct vhba_scsi_stats {
+       u64 invalid_tgt_cnt;
+       u64 invalid_lun_cnt;
+       u64 abort_success_cnt;
+       u64 abort_fail_cnt;
+       u64 dev_reset_success_cnt;
+       u64 dev_reset_fail_cnt;
+       u64 bus_reset_success_cnt;
+       u64 bus_reset_fail_cnt;
+       u64 host_reset_success_cnt;
+       u64 host_reset_fail_cnt;
+} __packed;
+
+struct vhba_ha_stats {
+       struct vhba_ib_stats ib_stats;
+       struct vhba_io_stats io_stats;
+       struct vhba_fmr_stats fmr_stats;
+       struct vhba_fc_stats fc_stats;
+       struct vhba_scsi_stats scsi_stats;
+} __packed;
+
+#define VHBA_NAME_LEN               16
+#define VHBA_LVM_NAME_LEN           128
+#define VHBA_MAX_BOOT_DEV           6
+#define VHBA_MAX_MOUNT_DEV          6
+#define VHBA_MOUNT_OPT_LEN          32
+
+struct host_san_mount_lvm {
+       u8 logical_vol_group[VHBA_LVM_NAME_LEN];
+       u8 logical_vol[VHBA_LVM_NAME_LEN];
+};
+
+struct host_san_vhba_list_sts {
+       u8 vh_name[VHBA_NAME_LEN];
+       u64 wwn;
+       u16 lun;
+       u8 tgt_num;             /* target number to expose */
+};
+
+union xg_tgt_wwpn {
+       u8 wwpn_t[WWN_SIZE];
+       u64 wwpn_val;
+};
+
+struct scsi_xg_vhba_host {
+       struct list_head list;
+       u8 host_str[16];
+       atomic_t vhba_flags;
+       struct vhba_ha_stats stats;
+       struct virtual_hba *vhba;
+       int vhba_num;
+
+       struct proc_dir_entry *vhba_proc;
+       struct proc_dir_entry *vhba_proc_target;
+
+       u8 *vhba_name[MAX_VHBA_NAME_SIZE];
+
+       u64 tca_guid;
+       u16 tca_lid;
+
+       /* SCSI Info */
+       struct Scsi_Host *host;
+       unsigned long host_no;
+       unsigned long instance;
+       u16 max_tgt_id;
+       u16 max_luns;
+       u16 max_targets;
+       u32 target_count;
+       struct srb *status_srb;
+       u32 lun_count;
+       struct list_head disc_ports;
+       /* OS target queue pointers */
+       struct os_tgt *otgt[MAX_FIBRE_TARGETS];
+
+       struct {
+               u32 init_done:1;
+               u32 online:1;
+               u32 reset_active:1;
+               u32 process_response_queue:1;
+               u32 enable_target_reset:1;
+       } flags;
+
+       /* Boot info */
+       u16 boot_count;
+       struct host_san_vhba_list_sts sanboot[VHBA_MAX_BOOT_DEV];
+
+       /* Mount info */
+       u16 mount_count;
+       struct host_san_vhba_list_sts sanmount[VHBA_MAX_MOUNT_DEV];
+       u16 mount_type;         /* 1 = logical vol
+                                  2 = direct mount
+                                  0 = vhba */
+
+       /* name of direct mount device: ex: /dev/sdb */
+       u8 direct_mount_dev[VHBA_LVM_NAME_LEN];
+
+       /* logical volume group and logical volume */
+       struct host_san_mount_lvm host_lvm_info;
+
+       /* mount options */
+       u8 mount_options[VHBA_MOUNT_OPT_LEN];
+
+       u8 discs_ready_flag;
+
+       /* IB Info */
+       u64 resource_id;
+       struct ib_link_info *link;
+       u32 control_qp_handle;
+       u32 control_qpn;
+       u32 data_qp_handle;
+       u32 data_qpn;
+       struct xt_cm_private_data ctrl_pvt;
+       struct xt_cm_private_data data_pvt;
+       atomic_t qp_status;
+       struct init_block init_blk;
+       struct vhba_write_index_msg *send_write_index_msg;
+
+       u32 max_cont_segs;
+
+       u8 sync_flag;
+
+       /* QL Info */
+       u32 vp_index;
+       u16 revision;
+       u8 ports;
+
+       /* FMR */
+       void *fmr_pool;
+       void *request_ring_fmr;
+       void *rindex_fmr;
+       void *scratch;
+
+       atomic_t link_state;
+       u32 device_flags;
+
+#define SRB_MIN_REQ     128
+
+       atomic_t dqp_send_buf_consumed;
+
+       /* Req ring lock, rings, and indexes */
+       dma_addr_t request_dma; /* Physical address */
+       struct cmd_type_7 *request_ring;        /* Base virtual address */
+       struct cmd_type_7 *request_ring_ptr;    /* Current address */
+       u16 req_ring_rindex_dummy;      /* Current index */
+       s16 req_ring_windex;    /* Current index */
+       u16 req_q_cnt;          /* Number of available entries */
+       u16 request_q_length;
+       dma_addr_t req_ring_rindex_dma;
+       u32 *req_ring_rindex;
+
+       /* Outstanding commands */
+       struct srb *outstanding_cmds[MAX_OUTSTANDING_COMMANDS];
+       u32 current_outstanding_cmd;
+       void *send_buf_ptr[REQUEST_ENTRY_CNT_24XX];
+       struct ib_wc recv_buf_ptr[64];
+
+       /* Defer list */
+       struct list_head defer_list;
+       atomic_t defer_cnt;
+       atomic_t periodic_def_cnt;
+       atomic_t ib_link_down_cnt;
+       atomic_t ib_status;
+
+       /* Lock order: First hold host_lock before holding list_lock */
+       spinlock_t list_lock ____cacheline_aligned;
+       spinlock_t io_lock ____cacheline_aligned;
+};
+
+struct xsvhba_conn {
+       u8 type;
+       int state;
+       struct xscore_conn_ctx ctx;
+};
+
+struct xsvhba_work {
+       struct work_struct work;
+       xsmp_cookie_t xsmp_hndl;
+       struct virual_hba *vhba;
+       u8 *msg;
+       u32 idr;
+       int len;
+       int status;
+};
+
+struct virtual_hba {
+       struct scsi_xg_vhba_host *ha;
+       struct vhba_xsmp_msg *cfg;
+       struct list_head list;
+       wait_queue_head_t timer_wq;
+       wait_queue_head_t delete_wq;
+
+       struct xsvhba_conn ctrl_conn;
+       struct xsvhba_conn data_conn;
+       struct xsmp_session_info xsmp_info;
+       xsmp_cookie_t xsmp_hndl;
+
+       atomic_t ref_cnt;
+       atomic_t vhba_state;
+       atomic_t reconnect_flag;
+       u32 idr;
+       int sync_needed;
+       int reconn_try_cnt;
+       int reconn_attempt;
+       int qp_count;
+       u64 cs_timeout_count;
+       atomic_t abort_count;
+       int qp_poll_count;
+       int heartbeat_count;
+       u64 resource_id;
+       int scanned_once;
+       int scan_reqd;
+       int xg_init_done;
+       struct proc_dir_entry *admin_down_proc;
+       struct work_struct work;
+};
+
+#define WWN_SIZE 8
+
+#define TQF_ONLINE              0      /* Device online to OS */
+#define TQF_SUSPENDED           1
+#define TQF_RETRY_CMDS          2
+
+#define VHBA_ALLOC_FMR          0x40
+#define VHBA_NO_TARGET_STATE    0x200
+#define VHBA_ADMIN_DOWN_STATE   0x400
+
+#define VHBA_DATA_QP            0x1
+#define VHBA_CONTROL_QP         0x2
+#define VHBA_BOTH_QP            0x3
+
+#define VHBA_READY              0
+#define VHBA_DRAINING           1
+#define VHBA_ABORTING           2
+#define VHBA_DELETING           3
+#define VHBA_DELETED            4
+
+#define FCS_UNCONFIGURED        1
+#define FCS_DEVICE_DEAD         2
+#define FCS_DEVICE_LOST         3
+#define FCS_ONLINE              4
+#define FCS_NOT_SUPPORTED       5
+
+struct os_lun {
+       struct fc_lun *fclun;   /* FC LUN context pointer */
+       u32 lun_id;
+
+       unsigned long q_flag;
+
+       u_long q_timeout;       /* total command timeouts */
+       atomic_t q_timer;       /* suspend timer */
+       u32 q_count;            /* current count */
+       u32 q_max;              /* maxmum count lun can be suspended */
+       u8 q_state;             /* lun State */
+
+       u_long io_cnt;          /* total xfer count since boot */
+       u_long out_cnt;         /* total outstanding IO count */
+       u_long w_cnt;           /* total writes */
+       u_long r_cnt;           /* total reads */
+       u_long avg_time;        /*  */
+};
+
+struct os_tgt {
+       /* LUN context pointer */
+       struct os_lun *olun[MAX_FIBRE_LUNS_MORE];
+       struct fc_port *fcport;
+       unsigned long flags;
+       struct scsi_xg_vhba_host *ha;
+
+       /* Persistent binding information */
+       union port_id d_id;
+       u8 node_name[WWN_SIZE];
+       u8 port_name[WWN_SIZE];
+       u8 init_done;
+       atomic_t ncmds;
+       u16 state;
+};
+
+#define FCF_TAPE_PRESENT       BIT_0
+struct fc_port {
+       struct list_head list;
+       struct list_head fcluns;
+
+       u8 node_name[WWN_SIZE];
+       u8 port_name[WWN_SIZE];
+       union port_id d_id;
+       u16 loop_id;
+
+       u8 port_type;
+
+       atomic_t state;
+       u32 flags;
+
+       struct os_tgt *tgt_queue;
+       u16 os_target_id;
+
+       u8 device_type;
+       u8 unused;
+
+       u8 bound;
+       u16 lun_count;
+
+       u8 lun_map[MAX_FIBRE_LUNS >> 3];
+       u16 lun_ids[MAX_FIBRE_LUNS];
+       u32 persistent_binding;
+
+       struct fc_rport *rport;
+       u32 supported_classes;
+
+};
+
+struct fc_lun {
+       struct list_head list;
+
+       u16 lun;
+       atomic_t state;
+       u8 device_type;
+
+       u8 max_path_retries;
+       u32 flags;
+};
+
+#define TGT_Q(ha, t)       (ha->otgt[t])
+#define LUN_Q(ha, t, l)    (TGT_Q(ha, t)->olun[l])
+#define GET_LU_Q(ha, t, l)     \
+       ((TGT_Q(ha, t) != NULL) ? TGT_Q(ha, t)->olun[l] : NULL)
+
+extern struct virtual_hba vhba_g;
+extern struct idr vhba_idr_table;
+extern rwlock_t vhba_global_lock;
+extern u32 vhba_current_idr;
+extern atomic_t vhba_count;
+extern struct workqueue_struct *vhba_workqueuep;
+
+#define MAX_LUNS 0xffff
+
+#endif /* __VHBA_OSDEF_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_proc.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_proc.c
new file mode 100644 (file)
index 0000000..51a8402
--- /dev/null
@@ -0,0 +1,845 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+#include <linux/proc_fs.h>
+#include <linux/delay.h>
+
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+#include "vhba_ib.h"
+#include "vhba_defs.h"
+
+int limit = PAGE_SIZE;
+
+int force_sp_copy;
+
+struct proc_dir_entry *proc_root_vhba = 0;
+struct proc_dir_entry *proc_root_vhba_dev = 0;
+struct proc_dir_entry *proc_root_vhba_targ = 0;
+
+/*
+int vhba_print_io_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_ib_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_xsmp_stats(struct seq_file *m,  struct scsi_xg_vhba_host *ha);
+int vhba_print_fmr_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_fc_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+int vhba_print_scsi_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha);
+*/
+
+static int vhba_proc_read_debug(struct seq_file *m, void *data);
+static ssize_t vhba_proc_write_debug(struct file *file,
+                                    const char __user *buffer, size_t count,
+                                    loff_t *offp);
+static int vhba_proc_open_debug(struct inode *inode, struct file *file);
+static int vhba_proc_read_force_copy(struct seq_file *m, void *data);
+static ssize_t vhba_proc_write_force_copy(struct file *file,
+                                         const char __user *buffer,
+                                         size_t count, loff_t *offp);
+static int vhba_proc_open_force_copy(struct inode *inode, struct file *file);
+static int vhba_proc_read_device(struct seq_file *m, void *data);
+static ssize_t vhba_proc_write_device(struct file *file,
+                                     const char __user *buffer, size_t count,
+                                     loff_t *offp);
+static int vhba_proc_open_device(struct inode *inode, struct file *file);
+static int vhba_proc_read_target(struct seq_file *m, void *data);
+static ssize_t vhba_proc_write_target(struct file *file,
+                                     const char __user *buffer, size_t count,
+                                     loff_t *offp);
+static int vhba_proc_open_target(struct inode *inode, struct file *file);
+static int vhba_proc_read_san_info(struct seq_file *m, void *data);
+static ssize_t vhba_proc_write_san_info(struct file *file,
+                                       const char __user *buffer,
+                                       size_t count, loff_t *offp);
+static int vhba_proc_open_san_info(struct inode *inode, struct file *file);
+
+static const struct file_operations vhba_debug_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = vhba_proc_open_debug,
+       .read = seq_read,
+       .write = vhba_proc_write_debug,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations vhba_force_copy_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = vhba_proc_open_force_copy,
+       .read = seq_read,
+       .write = vhba_proc_write_force_copy,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations vhba_device_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = vhba_proc_open_device,
+       .read = seq_read,
+       .write = vhba_proc_write_device,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations vhba_target_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = vhba_proc_open_target,
+       .read = seq_read,
+       .write = vhba_proc_write_target,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations vhba_san_info_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = vhba_proc_open_san_info,
+       .read = seq_read,
+       .write = vhba_proc_write_san_info,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static int vhba_proc_read_device(struct seq_file *m, void *data)
+{
+       char qp_sts_str[][64] = { "VHBA_QP_NOTCONNECTED",
+               "VHBA_QP_TRYCONNECTING",
+               "VHBA_QP_RECONNECTING",
+               "VHBA_QP_PARTIAL_CONNECT",
+               "VHBA_QP_CONNECTED",
+               "ERROR"
+       };
+
+       char vhba_sts_str[][64] = {
+               "VHBA_STATE_NOT_ACTIVE",
+               "VHBA_STATE_ACTIVE",
+               "VHBA_STATE_SCAN",
+               "VHBA_STATE_BUSY",
+               "ERROR"
+       };
+
+       struct virtual_hba *vhba;
+       int *pint;
+       struct scsi_xg_vhba_host *ha;
+       struct vhba_xsmp_msg *cfg;
+       int link_state;
+       u64 wwn;
+
+       vhba = (struct virtual_hba *)
+           vhba_get_context_by_idr((u32) (unsigned long)m->private);
+       if (vhba == NULL) {
+               dprintk(TRC_PROC, NULL, "Error - Null vhba context!\n");
+               return 0;
+       }
+       ha = vhba->ha;
+       if (atomic_read(&ha->vhba_flags) != VHBA_READY) {
+               dprintk(TRC_PROC, vhba, "VHBA not in ready state to\n"
+                       "                       display valid information!\n");
+               goto out;
+       }
+       cfg = vhba->cfg;
+
+       seq_puts(m, "VHBA Information\n");
+       seq_puts(m, "----------------\n");
+       seq_printf(m, "Symbolic Name\t\t\t: %s\n", (char *)(cfg->vh_name));
+       seq_printf(m, "Chassis Name\t\t\t: %s\n", vhba->xsmp_info.chassis_name);
+       seq_printf(m, "Chassis Version\t\t\t: %x\n", vhba->xsmp_info.version);
+       seq_printf(m, "Server-Profile Name\t\t: %s\n",
+                  vhba->xsmp_info.session_name);
+
+       if (vhba->cfg->vhba_flag & 0x1) {
+               seq_puts(m, "Bootable\t\t\t: Yes\n");
+       } else {
+               seq_puts(m, "Bootable\t\t\t: No\n");
+       }
+       seq_printf(m,
+                  "VHBA state\t\t\t: %s\n",
+                  vhba_sts_str[atomic_read(&vhba->vhba_state)]);
+       seq_puts(m, "Link State\t\t\t: ");
+       link_state = atomic_read(&ha->link_state);
+       switch (link_state) {
+       case 0:
+               seq_puts(m, "LINK_DOWN\n");
+               break;
+       case 1:
+               seq_puts(m, "LINK_UP\n");
+               break;
+       case 2:
+               seq_puts(m, "LINK_DEAD\n");
+               break;
+       default:
+               seq_puts(m, "UNKNOWN\n");
+       }
+       seq_puts(m, "IB Status\t\t\t: ");
+       switch (atomic_read(&ha->ib_status)) {
+       case 0:
+               seq_puts(m, "IB_UP\n");
+               break;
+       case 1:
+               seq_puts(m, "IB_DOWN\n");
+               break;
+       case 2:
+               seq_puts(m, "IB_DEAD\n");
+               break;
+       default:
+               seq_puts(m, "UNKNOWN\n");
+       }
+       seq_printf(m, "Reconnect Attempts\t\t: %d\n",
+                  (int)vhba->reconn_attempt);
+       seq_printf(m, "Cumulative QP Count\t\t: %d\n", (int)vhba->qp_count);
+       seq_printf(m, "Lun masking\t\t\t: %s\n",
+                  vhba->cfg->lunmask_enable ? "Enabled" : "Disabled");
+       seq_printf(m, "Host Number\t\t\t: %u\n", (unsigned)ha->host_no);
+       seq_printf(m, "Target count\t\t\t: %llu\n", (u64) ha->target_count);
+       wwn = (u64) (vhba->cfg)->wwn;
+       seq_puts(m, "Port WWN\t\t\t:\n");
+       seq_printf(m,
+                       "%02X:%02X:%02X:%02X:%02X:%02X:%02X:%02X\n",
+                       (u8) (wwn & 0xff), (u8) ((wwn >> 8) & 0xff),
+                       (u8) ((wwn >> 16) & 0xff), (u8) ((wwn >> 24) & 0xff),
+                       (u8) ((wwn >> 32) & 0xff), (u8) ((wwn >> 40) & 0xff),
+                       (u8) ((wwn >> 48) & 0xff), (u8) ((wwn >> 56) & 0xff));
+
+       seq_printf(m, "Scan Required\t\t\t: %d\n", vhba->scan_reqd);
+       seq_printf(m, "SCSI Max Retry count\t\t: %d\n", vhba_max_scsi_retry);
+       seq_puts(m, "\n");
+
+       vhba_print_xsmp_stats(m, ha);
+
+       seq_puts(m, "\n");
+       seq_puts(m, "VHBA Infiniband Information\n");
+       seq_puts(m, "---------------------------\n");
+       seq_printf(m, "Remote IB LID\t\t\t: 0x%x\n", be16_to_cpu(cfg->tca_lid));
+       pint = (int *)&cfg->tca_guid;
+       seq_printf(m, "Remote IB GUID\t\t\t: 0x%x%x\n",
+                  be32_to_cpu(*pint), be32_to_cpu(*(pint + 1)));
+       seq_printf(m, "Resource ID\t\t\t: 0x%Lx\n", cfg->resource_id);
+       seq_printf(m, "CQP handle/qpn\t\t\t: 0x%x/%u\n",
+                  ha->control_qp_handle, ha->control_qpn);
+       seq_printf(m, "DQP handle/qpn\t\t\t: 0x%x/%u\n",
+                  ha->data_qp_handle, ha->data_qpn);
+       seq_printf(m, "QP status\t\t\t: %s\n",
+                  qp_sts_str[atomic_read(&ha->qp_status)]);
+       seq_printf(m, "Driver ref count\t\t: %d\n",
+                  atomic_read(&vhba->ref_cnt));
+       seq_puts(m, "\n");
+
+       vhba_print_ib_stats(m, ha);
+       seq_puts(m, "\n");
+
+       vhba_print_io_stats(m, ha);
+       seq_puts(m, "\n");
+
+       /*XXX this all needs to go into different stats proc files
+        *       The vmkernel helpers don't do multipage returns, so
+        *         each /proc entry can only be less than 4K, 3K? */
+       vhba_print_fmr_stats(m, ha);
+       seq_puts(m, "\n");
+
+       vhba_print_fc_stats(m, ha);
+       seq_puts(m, "\n");
+
+       vhba_print_scsi_stats(m, ha);
+       seq_puts(m, "\n");
+out:
+       DEC_REF_CNT(vhba);
+       return 0;
+}
+
+static ssize_t vhba_proc_write_device(struct file *file,
+                                     const char __user *buffer, size_t count,
+                                     loff_t *offp)
+{
+       struct virtual_hba *vhba;
+       int newval = 0;
+       void *data = PDE_DATA(file_inode(file));
+       int ret;
+
+       vhba = (struct virtual_hba *)
+               vhba_get_context_by_idr((u32) (unsigned long)data);
+       if (vhba == NULL) {
+               dprintk(TRC_PROC, NULL, "Error - Null vhba context!\n");
+               return count;
+       }
+       ret = kstrtoint(buffer, 0, &newval);
+       if (ret < 0)
+               return ret;
+       vhba->scan_reqd = 0;
+       DEC_REF_CNT(vhba);
+       return count;
+}
+
+static int vhba_proc_open_device(struct inode *inode, struct file *file)
+{
+       return single_open(file, vhba_proc_read_device, PDE_DATA(inode));
+}
+
+static int vhba_proc_read_target(struct seq_file *m, void *data)
+{
+       struct virtual_hba *vhba;
+       int tgt, k;
+       struct scsi_xg_vhba_host *ha;
+       struct os_tgt *tq;
+
+       vhba = (struct virtual_hba *)
+           vhba_get_context_by_idr((u32) (unsigned long)m->private);
+
+       if (vhba == NULL) {
+               dprintk(TRC_PROC, NULL, "Error - Null vhba context!\n");
+               goto out;
+       }
+
+       ha = vhba->ha;
+       if (atomic_read(&ha->vhba_flags) != VHBA_READY) {
+               dprintk(TRC_PROC, NULL,
+                       "VHBA not in ready state to display valid information!\n");
+               goto out;
+       }
+
+       seq_puts(m, "VHBA Target Information\n");
+       seq_puts(m, "-----------------------\n\n");
+       seq_printf(m, "Host no\t\t\t\t: %u\n", (unsigned)ha->host_no);
+
+       for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+               tq = TGT_Q(ha, tgt);
+               if (!tq || (atomic_read(&tq->fcport->state) != FCS_ONLINE))
+                       continue;
+               seq_puts(m, "Target WWPN\t\t\t: ");
+               for (k = 0; k < WWN_SIZE; k++) {
+                       seq_printf(m, "%02x ", tq->port_name[k]);
+               }
+               seq_printf(m, "\nFC Port id\t\t\t: 0x%x\n", tq->d_id.b24);
+               seq_printf(m, "Bound\t\t\t\t: %d\n", tq->fcport->bound);
+               seq_printf(m, "ncmds\t\t\t\t: %d\n", atomic_read(&tq->ncmds));
+               seq_printf(m, "Lun Count\t\t\t: %d\n", tq->fcport->lun_count);
+               seq_printf(m, "N-Port Handle\t\t\t: 0x%x\n",
+                        tq->fcport->loop_id);
+               seq_printf(m, "Map Order\t\t\t: %d\n",
+                        tq->fcport->os_target_id);
+
+               seq_puts(m, "Lun id(s)\t\t\t:");
+               for (k = 0; k < tq->fcport->lun_count; k++) {
+                       if (k != 0) {
+                               seq_puts(m, ",");
+                       }
+                       seq_printf(m, " %d", tq->fcport->lun_ids[k]);
+               }
+               seq_puts(m, "\n\n");
+               seq_puts(m, "-------------------------\n\n");
+       }
+out:
+       DEC_REF_CNT(vhba);
+               return 0;
+}
+static ssize_t vhba_proc_write_target(struct file *file,
+                       const char __user *buffer, size_t count,
+                       loff_t *offp)
+{
+       /* Simply return from the function */
+       return 0;
+}
+
+static int vhba_proc_open_target(struct inode *inode, struct file *file)
+{
+       return single_open(file, vhba_proc_read_target, PDE_DATA(inode));
+}
+
+static int vhba_proc_read_san_info(struct seq_file *m, void *data)
+{
+       int j;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       read_lock(&vhba_global_lock);
+       list_for_each_entry(vhba, &vhba_g.list, list) {
+               ha = vhba->ha;
+               dprintk(TRC_PROC, NULL,
+                       "Mount count = %d\tBoot count = %d\n",
+                       ha->mount_count, ha->boot_count);
+               for (j = 0; j < ha->boot_count; j++)
+                       seq_printf(m, "sanboot%d=%s:%d:%d\n",
+                                  j, ha->sanboot[j].vh_name,
+                                  ha->sanboot[j].tgt_num, ha->sanboot[j].lun);
+               seq_puts(m, "\n\n");
+               for (j = 0; j < ha->mount_count; j++)
+                       seq_printf(m, "sanmount%d=%s:%d:%d\n",
+                                  j, ha->sanmount[j].vh_name,
+                                  ha->sanmount[j].tgt_num,
+                                  ha->sanmount[j].lun);
+               if (ha->mount_type == 1)
+                       seq_printf(m, "sanmount%d=lvm:%s:%s\n",
+                                  j, ha->host_lvm_info.logical_vol_group,
+                                  ha->host_lvm_info.logical_vol);
+               else if (ha->mount_type == 2)
+                       seq_printf(m, "sanmount=%s\n", ha->direct_mount_dev);
+               if (ha->mount_options != NULL)
+                       seq_printf(m, "mount-opts:%s:%s\n",
+                                  (char *)vhba->cfg->vh_name,
+                                  ha->mount_options);
+       }
+       read_unlock(&vhba_global_lock);
+
+       return 0;
+}
+
+static ssize_t vhba_proc_write_san_info(struct file *file,
+                                       const char __user *buffer,
+                                       size_t count, loff_t *offp)
+{
+       /* Simply return from the function */
+       return 0;
+}
+
+static int vhba_proc_open_san_info(struct inode *inode, struct file *file)
+{
+       return single_open(file, vhba_proc_read_san_info, PDE_DATA(inode));
+}
+
+static int vhba_proc_read_debug(struct seq_file *m, void *data)
+{
+       seq_printf(m, "Total wait time(secs): %ld\n", (vhba_wait_time / HZ));
+       seq_printf(m, "Debug bitmask: 0x%x\n", vhba_debug);
+       return 0;
+}
+
+static ssize_t vhba_proc_write_debug(struct file *file,
+                                    const char __user *buffer, size_t count,
+                                    loff_t *offp)
+{
+       int ret;
+       char    *buf = (char *) __get_free_page(GFP_USER);
+       if (!buf) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(buf, buffer, count - 1)) {
+               goto out;
+       }
+       buf[count] = '\0';
+
+       ret = kstrtoint(buf, 0, &vhba_debug);
+       if (ret != 0) {
+               return -EINVAL;
+       }
+       return count;
+
+out:
+       free_page((unsigned long)buf);
+       return -EINVAL;
+}
+
+static int vhba_proc_open_debug(struct inode *inode, struct file *file)
+{
+       return single_open(file, vhba_proc_read_debug, PDE_DATA(inode));
+}
+
+static int vhba_proc_read_force_copy(struct seq_file *m, void *data)
+{
+       seq_printf(m, "%d\n", force_sp_copy);
+       return 0;
+}
+
+static ssize_t vhba_proc_write_force_copy(struct file *file,
+                                         const char __user *buffer,
+                                         size_t count, loff_t *offp)
+{
+       int newval;
+       int ret;
+       char    *buf = (char *) __get_free_page(GFP_USER);
+       if (!buf) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(buf, buffer, count - 1)) {
+               goto out;
+       }
+       buf[count] = '\0';
+
+       ret = kstrtoint(buf, 0, &newval);
+       if (ret != 0) {
+               return -EINVAL;
+       }
+       if (newval >= 0 && newval < 2) {        /* Sanity checks */
+               force_sp_copy = newval;
+               return count;
+       } else
+               return -EINVAL;
+
+out:
+       free_page((unsigned long)buf);
+       return -EINVAL;
+}
+
+static int vhba_proc_open_force_copy(struct inode *inode, struct file *file)
+{
+       return single_open(file, vhba_proc_read_force_copy, PDE_DATA(inode));
+}
+
+int vhba_add_proc_entry(struct virtual_hba *vhba)
+{
+       struct proc_dir_entry *file;
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       char name[35];
+
+       sprintf(name, "%s.%Lx", (char *)vhba->cfg->vh_name, vhba->resource_id);
+       file = proc_create_data((char *)name, S_IFREG, proc_root_vhba_dev,
+                               &vhba_device_proc_fops,
+                               (void *)(unsigned long)vhba->idr);
+       if (!file) {
+               eprintk(vhba, "Unable to create/proc entry\n");
+               return -1;
+       }
+       ha->vhba_proc = file;
+
+       return 0;
+}
+
+void vhba_remove_proc_entry(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       char name[35];
+
+       if (ha->vhba_proc) {
+               sprintf(name, "%s.%Lx", (char *)vhba->cfg->vh_name,
+                       vhba->resource_id);
+               remove_proc_entry((char *)name, proc_root_vhba_dev);
+               ha->vhba_proc = 0;
+       }
+}
+
+int vhba_add_target_proc_entry(struct virtual_hba *vhba)
+{
+       struct proc_dir_entry *file;
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       int ret = 0;
+       char name[35];
+
+       sprintf(name, "%s.%Lx", (char *)vhba->cfg->vh_name, vhba->resource_id);
+       file = proc_create_data((char *)name, S_IFREG, proc_root_vhba_targ,
+                               &vhba_target_proc_fops,
+                               (void *)(unsigned long)vhba->idr);
+       if (!file) {
+               eprintk(vhba, "Unable to create/proc entry\n");
+               ret = -1;
+               goto add_target_proc_end;
+       }
+
+       ha->vhba_proc_target = file;
+
+add_target_proc_end:
+       return ret;
+}
+
+int vhba_remove_target_proc_entry(struct virtual_hba *vhba)
+{
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+       char name[35];
+
+       if (ha->vhba_proc_target) {
+               sprintf(name, "%s.%Lx", (char *)vhba->cfg->vh_name,
+                       vhba->resource_id);
+               remove_proc_entry((char *)name, proc_root_vhba_targ);
+               ha->vhba_proc_target = 0;
+       }
+       return 0;
+}
+
+int vhba_create_procfs_root_entries(void)
+{
+       struct proc_dir_entry *debug_file = 0;
+       struct proc_dir_entry *force_copy_file = 0;
+       struct proc_dir_entry *san_info = 0;
+
+       proc_root_vhba = proc_root_vhba_dev = NULL;
+
+       proc_root_vhba = proc_mkdir("driver/xsvhba", NULL);
+       if (!proc_root_vhba) {
+               eprintk(NULL, "Unable to create /proc/driver/xsvhba\n");
+               return -1;
+       } else {
+               debug_file = proc_create_data("debug", S_IFREG, proc_root_vhba,
+                               &vhba_debug_proc_fops, NULL);
+
+               force_copy_file =
+                   proc_create_data("force_copy", S_IFREG, proc_root_vhba,
+                            &vhba_force_copy_proc_fops, NULL);
+
+               san_info = proc_create_data("san-info", S_IFREG, proc_root_vhba,
+                                           &vhba_san_info_proc_fops, NULL);
+
+               proc_root_vhba_dev = proc_mkdir("devices", proc_root_vhba);
+               proc_root_vhba_targ = proc_mkdir("target_info", proc_root_vhba);
+       }
+
+       return 0;
+}
+
+void vhba_remove_procfs_root_entries(void)
+{
+       dprintk(TRC_PROC, NULL, "removing target_info proc entry\n");
+       if (proc_root_vhba_targ)
+               remove_proc_entry("target_info", proc_root_vhba);
+
+       dprintk(TRC_PROC, NULL, "removing devices proc entry\n");
+       if (proc_root_vhba_dev)
+               remove_proc_entry("devices", proc_root_vhba);
+
+       dprintk(TRC_PROC, NULL, "removing debug proc entry\n");
+       if (proc_root_vhba_dev)
+               remove_proc_entry("debug", proc_root_vhba);
+
+       dprintk(TRC_PROC, NULL, "removing san-info proc entry\n");
+       if (proc_root_vhba_dev)
+               remove_proc_entry("san-info", proc_root_vhba);
+
+       dprintk(TRC_PROC, NULL, "removing force copy proc entry\n");
+       if (proc_root_vhba_dev)
+               remove_proc_entry("force_copy", proc_root_vhba);
+
+       dprintk(TRC_PROC, NULL, "removing vhba proc entry\n");
+       if (proc_root_vhba)
+               remove_proc_entry("driver/xsvhba", NULL);
+}
+
+int vhba_print_io_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+       int i;
+
+       seq_puts(m, "VHBA I/O Statistics\n");
+       seq_puts(m, "-------------------\n");
+       seq_printf(m, "Read I/O Reqs\t\t\t: %llu\n",
+                  (u64) ha->stats.io_stats.total_read_reqs);
+       seq_printf(m, "Write I/O Reqs\t\t\t: %llu\n",
+                  (u64) ha->stats.io_stats.total_write_reqs);
+       seq_printf(m, "Task Mgmt Reqs\t\t\t: %llu\n",
+                  (u64) ha->stats.io_stats.total_task_mgmt_reqs);
+       seq_printf(m, "CS_TIMEOUT Count\t\t: %llu\n",
+                  (u64) ha->vhba->cs_timeout_count);
+       seq_printf(m, "Abort Count\t\t\t: %llu\n",
+                  (u64) atomic_read(&ha->vhba->abort_count));
+       seq_printf(m, "Total I/O Rsps\t\t\t: %llu\n",
+                  (u64) ha->stats.io_stats.total_io_rsp);
+       seq_printf(m, "Total copy I/Os\t\t\t: %llu\n",
+                  (u64) ha->stats.io_stats.total_copy_ios);
+       seq_printf(m, "Total copy page allocs\t\t: %llu\n",
+                  (u64) ha->stats.io_stats.total_copy_page_allocs);
+       seq_printf(m, "Total copy page frees\t\t: %llu\n",
+                  (u64) ha->stats.io_stats.total_copy_page_frees);
+       for (i = 0; i < VHBA_MAX_VH_Q_COUNT; i++) {
+               seq_printf(m, "Pending reqs for VH queue-%-2d\t: %llu\n", i,
+                          (u64) atomic_read(&ha->stats.io_stats.
+                                            num_vh_q_reqs[i]));
+       }
+
+       seq_printf(m, "Curr outstding cmd\t\t: %llu\n",
+                  (u64) ha->current_outstanding_cmd);
+
+       seq_printf(m, "Bytes Read\t\t\t: %llu\n",
+                  (u64) ha->stats.io_stats.total_read_mbytes);
+       seq_printf(m, "Bytes Written\t\t\t: %llu\n",
+                  (u64) ha->stats.io_stats.total_write_mbytes);
+
+       seq_printf(m, "Queue cmd busy return count\t: %llu\n",
+                  (u64) ha->stats.io_stats.qcmd_busy_ret_cnt);
+
+       return 0;
+}
+
+int vhba_print_ib_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+       int i;
+       struct ib_cntr {
+               char name[32];
+               u64 *cntr;
+       } ib_cntrs[] = {
+               {
+               "CQP down", &(ha->stats.ib_stats.cqp_dn_cnt)}, {
+               "CQP up", &(ha->stats.ib_stats.cqp_up_cnt)}, {
+               "CQP send error", &(ha->stats.ib_stats.cqp_send_err_cnt)}, {
+               "CQP receive error", &(ha->stats.ib_stats.cqp_recv_err_cnt)},
+               {
+               "CQP remote disconnect",
+                           &(ha->stats.ib_stats.cqp_remote_disconn_cnt)}, {
+               "DQP down", &(ha->stats.ib_stats.dqp_dn_cnt)}, {
+               "DQP up", &(ha->stats.ib_stats.dqp_up_cnt)}, {
+               "DQP send error", &(ha->stats.ib_stats.dqp_send_err_cnt)}, {
+               "DQP receive error", &(ha->stats.ib_stats.dqp_recv_err_cnt)},
+               {
+               "DQP remote disconnect",
+                           &(ha->stats.ib_stats.dqp_remote_disconn_cnt)}, {
+               "Current outstanding reqs",
+                           &(ha->stats.ib_stats.curr_outstanding_reqs)}, {
+               "Request queue full", &(ha->stats.ib_stats.total_req_q_fulls)},
+               {
+       "Outstanding queue wraps",
+                           &(ha->stats.ib_stats.total_outstding_q_wraps)},};
+
+       seq_puts(m, "VHBA IB Statistics\n");
+       seq_puts(m, "------------------\n");
+       for (i = 0; i < 13; i++) {
+               seq_printf(m, "%-24s\t: %llu\n",
+                          ib_cntrs[i].name, (u64) *(ib_cntrs[i].cntr));
+       }
+       return 0;
+}
+
+int vhba_print_xsmp_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+       int i;
+       struct xsmp_cntr {
+               char name[32];
+               u64 *cntr;
+       } xsmp_cntrs[] = {
+               {
+               "install", &(vhba_xsmp_stats.install_msg_cnt)}, {
+               "delete", &(vhba_xsmp_stats.delete_msg_cnt)}, {
+               "update", &(vhba_xsmp_stats.update_msg_cnt)}, {
+               "stats config", &(vhba_xsmp_stats.cfg_stats_msg_cnt)}, {
+               "stats clear", &(vhba_xsmp_stats.clr_stats_msg_cnt)}, {
+               "sync begin", &(vhba_xsmp_stats.sync_begin_msg_cnt)}, {
+               "sync end", &(vhba_xsmp_stats.sync_end_msg_cnt)}, {
+               "oper req", &(vhba_xsmp_stats.oper_req_msg_cnt)}, {
+               "unknown xsmp", &(vhba_xsmp_stats.unknown_msg_cnt)}, {
+               "xt state down", &(vhba_xsmp_stats.xt_state_dn_cnt)}, {
+               "tca lid change", &(vhba_xsmp_stats.tca_lid_changed_cnt)}, {
+       "abort all", &(vhba_xsmp_stats.abort_all_cnt)},};
+
+       seq_puts(m, "VHBA XSMP Statistics\n");
+       seq_puts(m, "--------------------\n");
+       for (i = 0; i < 12; i++) {
+               seq_printf(m, "%-20s\t\t: %llu\n",
+                          xsmp_cntrs[i].name, (u64) *(xsmp_cntrs[i].cntr));
+       }
+       seq_printf(m, "Last unknown xsmp msg\t\t: %llu\n",
+                  (u64) vhba_xsmp_stats.last_unknown_msg);
+       seq_printf(m, "Last known xsmp msg\t\t: %llu\n",
+                  (u64) vhba_xsmp_stats.last_msg);
+       return 0;
+}
+
+int vhba_print_fmr_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+       int i;
+       struct fmr_cntr {
+               char name[32];
+               u64 *cntr;
+       } fmr_cntrs[] = {
+               {
+               "FMR successful map", &(ha->stats.fmr_stats.map_cnt)}, {
+               "FMR unmap", &(ha->stats.fmr_stats.unmap_cnt)}, {
+               "FMR map fail", &(ha->stats.fmr_stats.map_fail_cnt)}, {
+               "Unaligned i/o", &(ha->stats.fmr_stats.unaligned_io_cnt)}, {
+               "Unaligned sg list ptr",
+                           &(ha->stats.fmr_stats.unaligned_ptr_cnt)}, {
+       "FMR i/o", &(ha->stats.fmr_stats.total_fmr_ios)},};
+
+       seq_puts(m, "VHBA FMR Statistics\n");
+       seq_puts(m, "-------------------\n");
+       for (i = 0; i < 6; i++) {
+               seq_printf(m, "%-24s\t: %llu\n",
+                          fmr_cntrs[i].name, (u64) *(fmr_cntrs[i].cntr));
+       }
+       return 0;
+}
+
+int vhba_print_fc_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+       int i;
+       struct fc_cntr {
+               char name[32];
+               u64 *cntr;
+       } fc_cntrs[] = {
+               {
+               "FC link down", &(ha->stats.fc_stats.link_dn_cnt)}, {
+               "FC link dead", &(ha->stats.fc_stats.link_dead_cnt)}, {
+               "FC link up", &(ha->stats.fc_stats.link_up_cnt)}, {
+               "Target online RSCN", &(ha->stats.fc_stats.rscn_up_cnt)}, {
+               "Target offline RSCN", &(ha->stats.fc_stats.rscn_dn_cnt)}, {
+               "Target dead RSCN", &(ha->stats.fc_stats.rscn_dead_cnt)}, {
+               "Dup RSCN for online tgt",
+                           &(ha->stats.fc_stats.rscn_multiple_up_cnt)}, {
+               "Dup RSCN for offline tgt",
+                           &(ha->stats.fc_stats.rscn_multiple_dn_cnt)}, {
+               "Last online target", &(ha->stats.fc_stats.last_up_tgt)}, {
+               "Last dead target", &(ha->stats.fc_stats.last_dead_tgt)}, {
+               "Last offline target", &(ha->stats.fc_stats.last_dn_tgt)}, {
+               "Disc info msg received", &(ha->stats.fc_stats.disc_info_cnt)},
+               {
+               "Enable resp msg received",
+                           &(ha->stats.fc_stats.enable_resp_cnt)}, {
+       "Enable msg sent", &(ha->stats.fc_stats.enable_msg_cnt)},};
+
+       seq_puts(m, "VHBA FC Statistics\n");
+       seq_puts(m, "------------------\n");
+       for (i = 0; i < 14; i++) {
+               seq_printf(m, "%-24s\t: %llu\n",
+                          fc_cntrs[i].name, (u64) *(fc_cntrs[i].cntr));
+       }
+       return 0;
+}
+
+int vhba_print_scsi_stats(struct seq_file *m, struct scsi_xg_vhba_host *ha)
+{
+       int i;
+       struct scsi_cntr {
+               char name[32];
+               u64 *cntr;
+       } scsi_cntrs[] = {
+               {
+               "Invalid target", &(ha->stats.scsi_stats.invalid_tgt_cnt)},
+               {
+               "Invalid lun", &(ha->stats.scsi_stats.invalid_lun_cnt)}, {
+               "Successful abort", &(ha->stats.scsi_stats.abort_success_cnt)},
+               {
+               "Failed abort", &(ha->stats.scsi_stats.abort_fail_cnt)}, {
+               "Successful device reset",
+                           &(ha->stats.scsi_stats.dev_reset_success_cnt)}, {
+               "Failed device reset",
+                           &(ha->stats.scsi_stats.dev_reset_fail_cnt)}, {
+               "Successful bus reset",
+                           &(ha->stats.scsi_stats.bus_reset_success_cnt)}, {
+               "Failed bus reset",
+                           &(ha->stats.scsi_stats.bus_reset_fail_cnt)}, {
+               "Successful host reset",
+                           &(ha->stats.scsi_stats.host_reset_success_cnt)}, {
+       "Failed host reset",
+                           &(ha->stats.scsi_stats.host_reset_fail_cnt)},};
+
+       seq_puts(m, "VHBA SCSI Statistics\n");
+       seq_puts(m, "--------------------\n");
+       for (i = 0; i < 10; i++) {
+               seq_printf(m, "%-24s\t: %llu\n", scsi_cntrs[i].name,
+                          (u64) *(scsi_cntrs[i].cntr));
+       }
+       return 0;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_scsi_intf.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_scsi_intf.c
new file mode 100644 (file)
index 0000000..1197388
--- /dev/null
@@ -0,0 +1,971 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/blkdev.h>
+#include <scsi/scsi_tcq.h>
+
+#include "vhba_defs.h"
+#include "vhba_ib.h"
+#include "vhba_align.h"
+#include "vhba_scsi_intf.h"
+
+#define XG_VHBA_VERSION "1.0.0"
+#define VHBA_ABORT_TIMEOUT 5
+#define VHBA_RESET_TIMEOUT 10
+
+static int xg_vhba_slave_configure(struct scsi_device *device);
+static int xg_vhba_eh_abort(struct scsi_cmnd *);
+static int xg_vhba_eh_device_reset(struct scsi_cmnd *);
+static int xg_vhba_eh_bus_reset(struct scsi_cmnd *);
+static int xg_vhba_eh_host_reset(struct scsi_cmnd *);
+
+struct info_str {
+       char *buffer;
+       int length;
+       off_t offset;
+       int pos;
+};
+
+static int xg_vhba_slave_configure(struct scsi_device *device)
+{
+       dprintk(TRC_FUNCS, NULL, "Entering...\n");
+
+       scsi_change_queue_depth(device, vhba_max_q_depth);
+
+       dprintk(TRC_FUNCS, NULL, "Returning\n");
+       return 0;
+}
+
+void xsigo_cmd_timeout(struct srb *sp)
+{
+       struct scsi_xg_vhba_host *ha;
+       struct scsi_cmnd *cmd;
+       struct virtual_hba *vhba;
+       int iocb_handle;
+       unsigned long flags = 0;
+
+       cmd = sp->cmd;
+       ha = sp->ha;
+       vhba = ha->vhba;
+
+       dprintk(TRC_SCSI_ERRS, vhba, "I/O timeout\n");
+
+       spin_lock_irqsave(&ha->io_lock, flags);
+
+       atomic_dec(&ha->stats.io_stats.num_vh_q_reqs[sp->queue_num]);
+
+       cmd->result = DID_ABORT << 16;
+       iocb_handle = sp->iocb_handle;
+       if (ha->outstanding_cmds[iocb_handle]) {
+               ha->outstanding_cmds[iocb_handle] = NULL;
+               CMD_SP(sp->cmd) = NULL;
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               complete_cmd_and_callback(vhba, sp, sp->cmd);
+               DEC_REF_CNT(vhba);
+       } else {
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+       }
+}
+
+static int xg_vhba_queuecommand_lck(struct scsi_cmnd *cmd,
+                                   void (*fn)(struct scsi_cmnd *))
+{
+       struct virtual_hba *vhba;
+       struct Scsi_Host *host;
+       struct scsi_xg_vhba_host *ha;
+       u32 t, l;
+       struct srb *sp;
+       struct os_tgt *tq;
+       struct os_lun *lq;
+       unsigned long flags = 0;
+       int index = -1;
+       u32 queue_num = 0;
+       u32 curr_position = 0;
+       int vhba_state;
+       int lun_map_byte, lun_map_bit;
+       int vv, lindex = -1;
+       struct srb *xg_sp;
+       int found = 0;
+       u32 timeout_val;
+
+       host = cmd->device->host;
+       cmd->scsi_done = fn;
+
+       if (!host) {
+               dprintk(TRC_SCSI_ERRS, NULL,
+                       "host ptr is null in queuecommand\n");
+               return SCSI_MLQUEUE_HOST_BUSY;
+       }
+       vhba = vhba_get_context_by_idr((u32) *(host->hostdata));
+       if (vhba == NULL) {
+               cmd->result = DID_NO_CONNECT << 16;
+               if (cmd->scsi_done)
+                       (cmd->scsi_done) (cmd);
+               else
+                       dprintk(TRC_SCSI_ERRS, NULL, "scsi_done is null\n");
+               return 0;
+       }
+       ha = (struct scsi_xg_vhba_host *)vhba->ha;
+
+       vhba_state = atomic_read(&vhba->vhba_state);
+       if ((vhba_state != VHBA_STATE_ACTIVE) &&
+           (vhba_state != VHBA_STATE_SCAN)) {
+               dprintk(TRC_SCSI_ERRS, vhba,
+                       "Error - vhba not active! returning DID_NO_CONNECT\n");
+               cmd->result = DID_NO_CONNECT << 16;
+               (cmd->scsi_done) (cmd);
+               DEC_REF_CNT(vhba);
+               return 0;
+       }
+
+/* Only use this define when you are doing a obj/opt build in vmware */
+#ifdef VMX86_DEVEL
+       if (atomic_read(&vhba->ref_cnt) <= 0)
+               panic("Refcount went negative\n");
+#endif
+
+       if ((atomic_read(&ha->ib_status) == VHBA_IB_DEAD) ||
+           (atomic_read(&ha->ib_status) == VHBA_IB_DOWN)) {
+               cmd->result = DID_NO_CONNECT << 16;
+               if (cmd->scsi_done) {
+                       (cmd->scsi_done) (cmd);
+                       dprintk(TRC_SCSI_ERRS, vhba,
+                               "returning DID_NO_CONNECT as QP is down\n");
+               }
+               DEC_REF_CNT(vhba);
+               return 0;
+       }
+
+       t = cmd->device->id;
+       l = cmd->device->lun;
+       dprintk(TRC_IO, vhba, "recvd tgt %d, lun %d\n", t, l);
+
+       if (l >= ha->max_luns) {
+               ha->stats.scsi_stats.invalid_lun_cnt++;
+               cmd->result = DID_NO_CONNECT << 16;
+               dprintk(TRC_SCSI_ERRS, vhba, "Invalid lun %d max luns %d\n",
+                       l, ha->max_luns);
+               goto release_return;
+       }
+
+       if (t >= ha->max_tgt_id) {
+               ha->stats.scsi_stats.invalid_tgt_cnt++;
+               if (ha->max_tgt_id != 0) {
+                       cmd->result = DID_BAD_TARGET << 16;
+                       dprintk(TRC_INFO, vhba, "Invalid target %d\n ", t);
+                       dprintk(TRC_INFO, vhba, "targt cnt %d", ha->max_tgt_id);
+               } else {
+                       cmd->result = DID_NO_CONNECT << 16;
+               }
+               goto release_return;
+       }
+
+       if (vhba_multiple_q)
+               queue_num = t % VHBA_MAX_VH_Q_COUNT;
+       else
+               queue_num = 0;
+
+       spin_lock_irqsave(&ha->io_lock, flags);
+
+       if (atomic_read(&ha->stats.io_stats.num_vh_q_reqs[queue_num])
+           >= vhba_max_q_depth) {
+               atomic_inc(&ha->stats.io_stats.vh_q_full_cnt[queue_num]);
+               /*
+                * Queue is full. If we have a command with ABORTING
+                * status pending in the outstanding array for this target,
+                * then in all likelyhood iocard/vh is hosed. Take the recovery
+                * action and disconnect the QP.
+                */
+
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               if (vhba_recovery_action(ha, t)) {
+                       cmd->result = DID_NO_CONNECT << 16;
+                       cmd->scsi_done(cmd);
+                       DEC_REF_CNT(vhba);
+                       return 0;
+               }
+               ha->stats.io_stats.qcmd_busy_ret_cnt++;
+               DEC_REF_CNT(vhba);
+               return SCSI_MLQUEUE_HOST_BUSY;
+
+       }
+
+       atomic_inc(&ha->stats.io_stats.num_vh_q_reqs[queue_num]);
+
+       lun_map_byte = l / 8;
+       lun_map_bit = l % 8;
+       tq = TGT_Q(ha, t);
+       if (tq) {
+               if (tq->init_done == 0) {
+                       dprintk(TRC_IO, vhba,
+                               "setting dma alignment to %ld for tgt %d\n",
+                               PAGE_SIZE, t);
+                       blk_queue_dma_alignment(cmd->device->request_queue,
+                                               (PAGE_SIZE - 1));
+                       tq->init_done = 1;
+               }
+               if (!(vhba->cfg->lunmask_enable))
+                       goto no_lun_mask;
+
+               if (l < MAX_FIBRE_LUNS) {
+                       for (vv = 0; vv < tq->fcport->lun_count; vv++) {
+                               if (l == tq->fcport->lun_ids[vv]) {
+                                       lindex = vv;
+                                       found = 1;
+                                       break;
+                               }
+                       }
+               } else
+                       found = 1;
+
+               dprintk(TRC_IO, vhba,
+                       "l=%d, lun_ids=%d,",  l, tq->fcport->lun_ids[lindex]);
+               dprintk(TRC_INFO, vhba, "cmd=%02x\n", cmd->cmnd[0]);
+
+               if (found == 0) {
+                       if (l == 0) {
+                               if (cmd->cmnd[0] == INQUIRY) {
+                                       struct scatterlist *sg;
+                                       char *buf;
+
+                                       cmd->result = DID_OK << 16;
+                                       if (scsi_sg_count(cmd)) {
+                                               unsigned int sg_offset;
+
+                                               sg = scsi_sglist(cmd);
+                                               sg_offset = SG_OFFSET(sg);
+
+                                               buf = page_address(sg_page(sg))
+                                                   + sg_offset;
+
+                                               *buf = 0x7f;
+                                               *(buf + 2) = 0x03;
+                                               *(buf + 3) = 0x22;
+                                               *(buf + 4) = 0x00;
+                                       } else if (scsi_bufflen(cmd)) {
+                                               buf = (u8 *) scsi_sglist(cmd);
+                                               *buf = 0x7f;
+                                               *(buf + 2) = 0x03;
+                                               *(buf + 3) = 0x22;
+                                               *(buf + 4) = 0x00;
+                                       }
+                                       dprintk(TRC_IO, vhba, "Mask LUN 0\n");
+                                       spin_unlock_irqrestore(&ha->io_lock,
+                                                              flags);
+                                       goto dec_release_return;
+                               }
+                       } else {
+                               ha->stats.scsi_stats.invalid_lun_cnt++;
+                               cmd->result = DID_NO_CONNECT << 16;
+                               dprintk(TRC_SCSI_ERRS, vhba, "(LUN ID) Error");
+                               dprintk(TRC_SCSI_ERRS, vhba, "lun %d ", l);
+                               dprintk(TRC_SCSI_ERRS, vhba, "not found in ");
+                               dprintk(TRC_SCSI_ERRS, vhba, "target queue!\n");
+                               spin_unlock_irqrestore(&ha->io_lock, flags);
+                               goto dec_release_return;
+                       }
+               }
+no_lun_mask:
+               lq = LUN_Q(ha, t, l);
+               if (!(lq)) {
+                       lq = vhba_allocate_lun(vhba, t, l);
+                       if (lq)
+                               lq->fclun = kmalloc(sizeof(struct fc_lun),
+                                                   GFP_ATOMIC);
+                       if (!lq || !lq->fclun) {
+                               cmd->result = DID_NO_CONNECT << 16;
+                               spin_unlock_irqrestore(&ha->io_lock, flags);
+                               goto dec_release_return;
+                       }
+                       memset(lq->fclun, 0, sizeof(struct fc_lun));
+                       lq->fclun->lun = l;
+               }
+
+               dprintk(TRC_IO, vhba, "mapped tgt %d" " lun %d\n", t, l);
+       } else {
+               ha->stats.scsi_stats.invalid_tgt_cnt++;
+               cmd->result = DID_NO_CONNECT << 16;
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               goto dec_release_return;
+       }
+
+       /* Maximum SCSI I/O retry */
+       if (cmd->allowed < vhba_max_scsi_retry)
+               cmd->allowed = vhba_max_scsi_retry;
+
+       if (atomic_read(&ha->link_state) == LINK_DEAD ||
+           atomic_read(&tq->fcport->state) == FCS_DEVICE_DEAD) {
+               cmd->result = DID_NO_CONNECT << 16;
+
+               dprintk(TRC_TIMER, vhba, "Error - link/tgt dead!\n");
+               dprintk(TRC_TIMER, vhba, "Link state %d device state %d\n",
+                       atomic_read(&ha->link_state),
+                       atomic_read(&tq->fcport->state));
+
+               dprintk(TRC_TIMER, vhba, "sp(%p) cmd:(%p)", CMD_SP(cmd), cmd);
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               goto dec_release_return;
+       }
+
+       if (vhba->cfg->lunmask_enable) {
+               /* Report lun interception */
+               if ((cmd->cmnd[0] == REPORT_LUNS) &&
+                   (atomic_read(&ha->link_state) == LINK_UP) &&
+                   (atomic_read(&tq->fcport->state) == FCS_ONLINE)) {
+                       /* Just decrement the ha reference counter right away
+                        * as the command is not going to be sent to the
+                        * chip anyway.*/
+                       atomic_dec(&ha->stats.io_stats.
+                                  num_vh_q_reqs[queue_num]);
+                       xg_sp = kmalloc(sizeof(struct srb), GFP_ATOMIC);
+                       if (xg_sp == NULL) {
+                               cmd->result = DID_ERROR << 16;
+                               eprintk(vhba, "Error - allocate SRB failed\n");
+                               goto release_return;
+                       }
+                       memset(xg_sp, 0, sizeof(struct srb));
+                       xg_sp->cmd = cmd;
+                       xg_sp->ha = ha;
+                       CMD_SP(cmd) = (void *)xg_sp;
+                       xg_sp->state = 0;
+                       xg_sp->abort_cnt = 0;
+
+                       spin_unlock_irqrestore(&ha->io_lock, flags);
+
+                       if (vhba_report_luns_cmd(xg_sp, t, l)) {
+                               kfree(xg_sp);
+                               goto release_return;
+                       } else {
+
+                               cmd->result = DID_OK << 16;
+
+                               if (xg_sp->cmd) {
+                                       if (xg_sp->cmd->scsi_done)
+                                               (*(xg_sp->cmd)->scsi_done)
+                                                   (xg_sp->cmd);
+                               }
+                               kfree(xg_sp);
+                               /*
+                                * Decrement vhba ref cnt, since the cmd
+                                * is not going down.
+                                */
+                               DEC_REF_CNT(vhba);
+                               goto ret_success;
+                       }
+               }
+       }
+       index = get_outstding_cmd_entry(vhba);
+       if (index == -1) {
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               dprintk(TRC_SCSI_ERRS, vhba,
+               "Warn - Max limit on outstanding commands reached.\n");
+               dprintk(TRC_SCSI_ERRS, vhba, "returnin SCSI_MLQUEUE_HOST_BUSY");
+               atomic_dec(&ha->stats.io_stats.num_vh_q_reqs[queue_num]);
+               ha->stats.io_stats.qcmd_busy_ret_cnt++;
+               DEC_REF_CNT(vhba);
+               return SCSI_MLQUEUE_HOST_BUSY;
+
+       }
+
+       ha->outstanding_cmds[ha->current_outstanding_cmd] =
+           kmalloc(sizeof(struct srb), GFP_ATOMIC);
+       if (ha->outstanding_cmds[ha->current_outstanding_cmd] == NULL) {
+               cmd->result = DID_ERROR << 16;
+               eprintk(vhba, "Error - allocate SRB failed\n");
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               goto dec_release_return;
+       }
+
+       sp = ha->outstanding_cmds[ha->current_outstanding_cmd];
+       memset(sp, 0, sizeof(struct srb));
+       sp->cmd = cmd;
+       sp->ha = ha;
+       CMD_SP(cmd) = (void *)sp;
+       sp->state = 0;
+       sp->tgt_queue = tq;
+       sp->lun_queue = lq;
+       sp->error_flag = 0;
+       sp->abort_cnt = 0;
+       sp->unaligned_sg = NULL;
+
+       sp->queue_num = queue_num;
+
+       if (tq->fcport->flags & FCF_TAPE_PRESENT)
+               sp->flags |= SRB_TAPE;
+
+       /* Check for processor irq affinity or few outstanding
+          I/O for processing otherwise the IRQ can pick up and submit the I/O
+        */
+
+       curr_position = ha->current_outstanding_cmd++;
+       if (ha->current_outstanding_cmd == MAX_OUTSTANDING_COMMANDS)
+               ha->current_outstanding_cmd = 0;
+
+       if ((timeout_per_command(cmd) / HZ) <= IB_CMD_TIMEOUT_DELTA)
+               timeout_val = vhba_default_scsi_timeout;
+       else
+               timeout_val = timeout_per_command(cmd) / HZ;
+
+       sp->timer.expires = jiffies + (timeout_val - IB_CMD_TIMEOUT_DELTA) * HZ;
+       init_timer(&sp->timer);
+       sp->timer.data = (unsigned long)sp;
+       sp->timer.function = (void (*)(unsigned long))xsigo_cmd_timeout;
+       add_timer(&sp->timer);
+
+       /* Prepare the IOCB, the handle, build IOCB and fire it off */
+       dprintk(TRC_IO, vhba,
+               "calling start scsi for sp %p t %d l %d\n", sp, t, (u32) l);
+
+       if (vhba_start_scsi(sp, t, l, curr_position)) {
+               dprintk(TRC_INFO, vhba,
+                       "vhba_start_scsi failed sp=%p cmd=%p\n", sp, sp->cmd);
+               if (sp->timer.function != NULL) {
+                       del_timer(&sp->timer);
+                       sp->timer.function = NULL;
+               }
+               if (ha->outstanding_cmds[curr_position]) {
+                       CMD_SP(sp->cmd) = NULL;
+                       kfree(ha->outstanding_cmds[curr_position]);
+                       ha->outstanding_cmds[curr_position] = NULL;
+               } else {
+                       /* Cmd got flushed asynchronously */
+                       dprintk(TRC_INFO, vhba,
+                       "Cmd Got flushed Asynchronously");
+                       dprintk(TRC_INFO, vhba, " sp=%p cmd=%p\n", sp, sp->cmd);
+                       DEC_REF_CNT(vhba);
+                       spin_unlock_irqrestore(&ha->io_lock, flags);
+                       return 0;
+               }
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               cmd->result = DID_BUS_BUSY << 16;
+               goto dec_release_return;
+       }
+       spin_unlock_irqrestore(&ha->io_lock, flags);
+
+ret_success:
+       dprintk(TRC_FUNCS, vhba, "Returning SUCCESS\n");
+       return 0;
+
+dec_release_return:
+       atomic_dec(&ha->stats.io_stats.num_vh_q_reqs[queue_num]);
+
+release_return:
+       dprintk(TRC_SCSI_ERRS, vhba, "returning cmd status %d from qcmd\n",
+               (int)((cmd->result) >> 16));
+       (cmd->scsi_done) (cmd);
+
+       DEC_REF_CNT(vhba);
+       return 0;
+}
+
+/*
+ * The queuecommand has changed from 2.6.37 where it is
+ * now lock-less and the prototype has changed.
+ * In order to provide backward compatibility a MACRO
+ * is provided by linux which will call queuecommand
+ * with host_lock held. We will use that MACRO so that the
+ * behavior is the same before 2.6.37
+ * Please see Documentation/scsi/scsi_mid_low_api.txt in
+ * linux kernel tree and the following URL
+ * for discussion on lockless queuecommand.
+ * http://www.spinics.net/lists/linux-scsi/msg48200.html
+ */
+
+#if !defined(DEF_SCSI_QCMD)
+
+#define        xg_vhba_queuecommand    xg_vhba_queuecommand_lck
+
+#else
+
+DEF_SCSI_QCMD(xg_vhba_queuecommand)
+#endif
+struct scsi_host_template xg_vhba_driver_template = {
+       .module = THIS_MODULE,
+       .name = "xsvhba",
+       .proc_name = "xsvhba",
+       .queuecommand = xg_vhba_queuecommand,
+       .eh_abort_handler = xg_vhba_eh_abort,
+       .eh_device_reset_handler = xg_vhba_eh_device_reset,
+       .eh_bus_reset_handler = xg_vhba_eh_bus_reset,
+       .eh_host_reset_handler = xg_vhba_eh_host_reset,
+       .slave_configure = xg_vhba_slave_configure,
+#ifdef CONFIG_SCSI_QLA2xxx_FAILOVER
+       .ioctl = xg_vhba_ioctl,
+#endif
+       .this_id = -1,
+       .cmd_per_lun = 1,
+       .use_clustering = ENABLE_CLUSTERING,
+/* Xsigo limit is 6 */
+       .sg_tablesize = 1,
+/* 512 secs * 512 bytes = VH limit (256 KB) */
+       .max_sectors = VHBA_DEFAULT_TRANSFER_SIZE,
+       .use_blk_tags = 1,
+};
+
+void sp_put(struct virtual_hba *vhba, struct srb *sp)
+{
+       if ((sp->cmd) && (sp->cmd->scsi_done))
+               (*(sp->cmd)->scsi_done) (sp->cmd);
+       kfree(sp);
+}
+
+static int xg_vhba_eh_abort(struct scsi_cmnd *cmd)
+{
+       struct virtual_hba *vhba;
+       struct srb *sp, *sp1;
+       unsigned int b, t, l;
+       struct scsi_xg_vhba_host *ha = NULL;
+       unsigned long flags = 0;
+       int iocb_handle = 0;
+       int i, ret = FAILED;
+
+       vhba = vhba_get_context_by_idr((u32) *(cmd->device->host->hostdata));
+
+       if (vhba == NULL) {
+               dprintk(TRC_ERRORS, NULL,
+                       "Could not find vhba for this command\n");
+               return FAILED;
+       }
+       ha = vhba->ha;
+
+       spin_lock_irqsave(&ha->io_lock, flags);
+
+       sp = (struct srb *)CMD_SP(cmd);
+
+       if (sp == NULL) {
+               dprintk(TRC_INFO, vhba, "cmd already done cmd=%p\n", cmd);
+               ret = FAILED;
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               goto out;
+       }
+
+       /* Generate LU queue on bus, target, LUN */
+       b = cmd->device->channel;
+       t = cmd->device->id;
+       l = cmd->device->lun;
+
+       /*
+        * Print the type of command and size of the IO being aborted.
+        */
+       dprintk(TRC_INFO, vhba,
+               "Abort cmd called for sp=%p, cmd=%p,", sp, cmd);
+       dprintk(TRC_INFO, vhba, " opcode/len = 0x%x/0x%x\n",
+               cmd->cmnd[0], scsi_bufflen(cmd));
+
+       atomic_inc(&vhba->abort_count);
+
+       for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+               sp1 = ha->outstanding_cmds[i];
+               if (sp1 == NULL)
+                       continue;
+               if (sp1->cmd == cmd) {
+                       /*
+                        * We found the command. sp1 must be same as sp, if
+                        * not, we have a duplicate command in the list, and
+                        * we should fail this abort.
+                        */
+                       if (sp1 != sp) {
+                               dprintk(TRC_INFO, vhba,
+                                       "Duplicate cmd in Outstanding array: ");
+                               dprintk(TRC_INFO, vhba, "sp=%p, cmd=%p,sp1=%p",
+                                        sp, cmd, sp1);
+                               spin_unlock_irqrestore(&ha->io_lock, flags);
+                               ret = FAILED;
+                               goto out;
+                       }
+                       break;
+               }
+       }
+       /*
+        * If IOP did not respond to the first abort and it
+        * failed through this routine, it is possible that the IOP
+        * never got a chance to look at the abort and the command
+        * about to be aborted crossed paths with the abort failure.
+        * In that case, mark the second attempt to abort this command
+        * as success.
+        */
+       if ((sp->state == VHBA_IO_STATE_ABORTED) ||
+           (sp->state == VHBA_IO_STATE_ABORT_NEEDED)) {
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               goto success;
+       }
+
+       if (i == MAX_OUTSTANDING_COMMANDS) {
+               if (atomic_read(&ha->ib_status) == VHBA_IB_DEAD) {
+                       spin_unlock_irqrestore(&ha->io_lock, flags);
+                       ret = FAILED;
+                       goto out;
+               }
+               dprintk(TRC_INFO, vhba,
+                       "Failing Abort(): cant find sp:0x%p, ", sp);
+               dprintk(TRC_INFO, vhba, "cmd:0x%p sp->cmd:0x%p", cmd, sp->cmd);
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               ret = FAILED;
+               goto out;
+       }
+
+       sp->state = VHBA_IO_STATE_ABORTING;
+       iocb_handle = sp->iocb_handle;
+
+       /*
+        * It may take upto 30 seconds for a target to transition from
+        * LOST to ONLINE/DEAD state. Aborts will continue to fail during
+        * that time. Allow that much time before starting recovery.
+        */
+
+       if (((sp->abort_cnt)++ > vhba_abort_recovery_count) &&
+           (atomic_read(&ha->ib_status) == VHBA_IB_UP)) {
+               /*
+                * We are stuck in ABORT loop due to IOP/agent being stuck
+                * Purge all pending IOs and disconnect/reconnect QP
+                */
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               dprintk(TRC_INFO, vhba,
+                        "Abort failed %d times", vhba_abort_recovery_count);
+               dprintk(TRC_INFO, vhba, "initiating recovery action\n");
+               atomic_set(&ha->ib_status, VHBA_IB_DEAD);
+               vhba_purge_pending_ios(vhba);
+               /*
+                * Let the Work Queue thread disconnect the Q pair.
+                */
+               atomic_set((&ha->qp_status), VHBA_QP_TRYCONNECTING);
+               ret = FAILED;
+               goto out;
+
+       }
+
+       ret = vhba_send_abort(vhba, iocb_handle, t);
+       if (ret) {
+               /*
+                * If  QP is disconnected, complete the abort
+                */
+               if (ret == VHBA_QP_DISCONNECTED) {
+                       if (ha->outstanding_cmds[iocb_handle]) {
+                               ha->outstanding_cmds[iocb_handle] = NULL;
+                               atomic_dec(&ha->stats.
+                                          io_stats.num_vh_q_reqs[sp->
+                                                                 queue_num]);
+                               goto success;
+                       } else {
+                               dprintk(TRC_INFO, vhba,
+                               "cmd completed while we were in abort()");
+                               dprintk(TRC_INFO, vhba, "cmd = %p sp->cmd = %p",
+                                       cmd, sp->cmd);
+                               ret = FAILED;
+                               spin_unlock_irqrestore(&ha->io_lock, flags);
+                               goto out;
+                       }
+               }
+               ha->stats.scsi_stats.abort_fail_cnt++;
+               dprintk(TRC_INFO, vhba, "Error - send abort failed %d\n", ret);
+               ret = FAILED;
+               sp->state = VHBA_IO_STATE_ACTIVE;
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               goto out;
+       }
+       if (sp->state == VHBA_IO_STATE_ABORTING) {
+               ret = FAILED;
+               if (sp->timer.function != NULL) {
+                       del_timer(&sp->timer);
+                       sp->timer.function = NULL;
+                       sp->timer.data = (unsigned long)NULL;
+               }
+               sp->state = VHBA_IO_STATE_ABORT_FAILED;
+               spin_unlock_irqrestore(&ha->io_lock, flags);
+               goto out;
+       }
+
+success:
+       ha->stats.scsi_stats.abort_success_cnt++;
+       if (sp->timer.function != NULL) {
+               del_timer(&sp->timer);
+               sp->timer.function = NULL;
+               sp->timer.data = (unsigned long)NULL;
+       }
+       sp->cmd->result = DID_ABORT << 16;
+       /*
+        * Reacquire the iocb handle and clear the
+        * outstanding array entry.
+        */
+
+       iocb_handle = sp->iocb_handle;
+       if (ha->outstanding_cmds[iocb_handle])
+               ha->outstanding_cmds[iocb_handle] = NULL;
+       CMD_SP(sp->cmd) = NULL;
+       spin_unlock_irqrestore(&ha->io_lock, flags);
+
+       complete_cmd_and_callback(vhba, sp, sp->cmd);
+
+       /*
+        * Decrement Ref count for the original command
+        */
+       DEC_REF_CNT(vhba);
+       ret = SUCCESS;
+       dprintk(TRC_INFO, vhba,
+       "Abort Success for sp=%p, cmd=%p, ", sp, cmd);
+       dprintk(TRC_INFO, vhba, "sp->cmd=%p\n", sp->cmd);
+out:
+       DEC_REF_CNT(vhba);
+       return ret;
+}
+
+static int xg_vhba_eh_device_reset(struct scsi_cmnd *cmd)
+{
+       struct virtual_hba *vhba;
+       unsigned int b, t, l;
+       struct scsi_xg_vhba_host *ha = NULL;
+       int ret = FAILED;
+
+       vhba = vhba_get_context_by_idr((u32) *(cmd->device->host->hostdata));
+
+       if (vhba == NULL) {
+               dprintk(TRC_ERR_RECOV, vhba,
+                       "Could not find vhba for this command\n");
+               return FAILED;
+       }
+
+       ha = (struct scsi_xg_vhba_host *)vhba->ha;
+
+       /* Generate LU queue on bus, target, LUN */
+       b = cmd->device->channel;
+       t = cmd->device->id;
+       l = cmd->device->lun;
+
+       dprintk(TRC_INFO, vhba,
+               "Device Reset called for cmd=%p ", cmd);
+       dprintk(TRC_INFO, vhba, "tgt=%d, lun=%d\n", t, l);
+
+       dprintk(TRC_INFO, vhba, "TGT reset:tgt=%d\n", t);
+       ret = vhba_send_tgt_reset(vhba, t);
+       if (ret && ret != VHBA_QP_DISCONNECTED) {
+               ha->stats.scsi_stats.dev_reset_fail_cnt++;
+               dprintk(TRC_INFO, vhba, "Error - send failed\n");
+               ret = FAILED;
+               goto out;
+       } else
+               vhba_taskmgmt_flush_ios(vhba, cmd->device->id, -1, 0);
+
+       ret = SUCCESS;
+       ha->stats.scsi_stats.dev_reset_success_cnt++;
+       dprintk(TRC_INFO, vhba, "Device Reset Successful!\n");
+out:
+       DEC_REF_CNT(vhba);
+       return ret;
+}
+
+static int xg_vhba_eh_bus_reset(struct scsi_cmnd *cmd)
+{
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       vhba = vhba_get_context_by_idr((u32) *(cmd->device->host->hostdata));
+
+       if (vhba == NULL) {
+               eprintk(vhba, "Could not find vhba for this command\n");
+               return FAILED;
+       }
+
+       dprintk(TRC_INFO, vhba, "Bus reset called\n");
+
+       ha = (struct scsi_xg_vhba_host *)vhba->ha;
+
+       vhba_ib_disconnect_qp(vhba);
+       vhba_purge_pending_ios(vhba);
+
+       atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+
+       ha->stats.scsi_stats.bus_reset_success_cnt++;
+       dprintk(TRC_INFO, vhba, "Bus Reset Successful\n");
+
+       DEC_REF_CNT(vhba);
+       return SUCCESS;
+}
+
+static int xg_vhba_eh_host_reset(struct scsi_cmnd *cmd)
+{
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+
+       vhba = vhba_get_context_by_idr((u32) *(cmd->device->host->hostdata));
+
+       if (vhba == NULL) {
+               eprintk(vhba, "Could not find vhba for this command\n");
+               return FAILED;
+       }
+
+       dprintk(TRC_INFO, vhba, "Host Reset Called\n");
+
+       ha = (struct scsi_xg_vhba_host *)vhba->ha;
+
+       vhba_ib_disconnect_qp(vhba);
+       vhba_purge_pending_ios(vhba);
+
+       atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+
+       ha->stats.scsi_stats.bus_reset_success_cnt++;
+       dprintk(TRC_INFO, vhba, "Host Reset Successful\n");
+
+       DEC_REF_CNT(vhba);
+       return SUCCESS;
+}
+
+void copy_mem_info(struct info_str *info, char *data, int len)
+{
+       dprintk(TRC_FUNCS, NULL, "Entering\n");
+
+       if (info->pos + len > info->offset + info->length)
+               len = info->offset + info->length - info->pos;
+
+       if (info->pos + len < info->offset) {
+               info->pos += len;
+               return;
+       }
+
+       if (info->pos < info->offset) {
+               off_t partial;
+
+               partial = info->offset - info->pos;
+               data += partial;
+               info->pos += partial;
+               len -= partial;
+       }
+
+       if (len > 0) {
+               memcpy(info->buffer, data, len);
+               info->pos += len;
+               info->buffer += len;
+       }
+       dprintk(TRC_FUNCS, NULL, "Returning\n");
+}
+
+static int copy_info(struct info_str *info, char *fmt, ...)
+{
+       va_list args;
+       char buf[256];
+       int len;
+
+       va_start(args, fmt);
+       len = vsprintf(buf, fmt, args);
+       va_end(args);
+
+       copy_mem_info(info, buf, len);
+       return len;
+}
+
+int xg_vhba_proc_info(struct Scsi_Host *shost, char *buffer, char **start,
+                     off_t offset, int length, int inout)
+{
+       struct virtual_hba *vhba = NULL;
+       struct info_str info;
+       struct scsi_xg_vhba_host *ha;
+       int retval;
+
+       vhba = vhba_get_context_by_idr((u32) *(shost->hostdata));
+       if (vhba == NULL)
+               return 0;
+       ha = vhba->ha;
+
+       if (inout) {
+               DEC_REF_CNT(vhba);
+               return length;
+       }
+
+       if (start)
+               *start = buffer;
+
+       info.buffer = buffer;
+       info.length = length;
+       info.offset = offset;
+       info.pos = 0;
+
+       /* start building the print buffer */
+       copy_info(&info, "Xsigo Virtual Host Adapter\n");
+       copy_info(&info, "Driver version %s\n", XG_VHBA_VERSION);
+
+       retval = info.pos > info.offset ? info.pos - info.offset : 0;
+
+       dprintk(TRC_PROC, vhba,
+               "Exiting proc_info: info.pos=%d,", info.pos);
+       dprintk(TRC_INFO, vhba, "offset=0x%lx, length=0x%x\n", offset, length);
+       DEC_REF_CNT(vhba);
+       return retval;
+}
+
+int vhba_recovery_action(struct scsi_xg_vhba_host *ha, u32 t)
+{
+       struct os_tgt *tq;
+       struct srb *sp;
+       struct virtual_hba *vhba = ha->vhba;
+       unsigned long flags = 0;
+       int i, count = 0;
+       int rval = 0;
+
+       tq = TGT_Q(ha, t);
+
+       spin_lock_irqsave(&ha->io_lock, flags);
+       for (i = 0; i < MAX_OUTSTANDING_COMMANDS; i++) {
+               if (ha->outstanding_cmds[i]) {
+                       sp = ha->outstanding_cmds[i];
+                       if ((sp->tgt_queue == tq) &&
+                           (sp->state == VHBA_IO_STATE_ABORTING)) {
+                               count++;
+                       }
+               }
+
+       }
+       spin_unlock_irqrestore(&ha->io_lock, flags);
+       if (count == VHBA_MAX_VH_Q_DEPTH) {
+               /*
+                * We found all the commands stuck in ABORTING state and the
+                * queue is full.Fflush the defer list and purge all pending IOs
+                */
+               dprintk(TRC_INFO, vhba,
+                       "Command queue is stuck with aborts.");
+               dprintk(TRC_INFO, vhba, " Take recovery actions.\n");
+
+               atomic_set(&ha->ib_status, VHBA_IB_DEAD);
+
+               vhba_purge_pending_ios(vhba);
+
+               /*
+                * Let the Work Queue thread disconnect the Q pair.
+                */
+
+               atomic_set((&ha->qp_status), VHBA_QP_TRYCONNECTING);
+
+               rval = 1;
+       }
+
+       return rval;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_scsi_intf.h b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_scsi_intf.h
new file mode 100644 (file)
index 0000000..3f59cc0
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __VHBA_SCSI_INTF_H__
+#define __VHBA_SCSI_INTF_H__
+
+extern int vhba_max_q_depth;
+extern int vhba_max_scsi_retry;
+extern int vhba_default_scsi_timeout;
+
+int vhba_purge_pending_ios(struct virtual_hba *vhba);
+void vhba_taskmgmt_flush_ios(struct virtual_hba *vhba, int tgt_id, int lun,
+                            int lun_reset_flag);
+int vhba_send_abort(struct virtual_hba *vhba, int abort_handle, int t);
+int vhba_send_lun_reset(struct virtual_hba *vhba, int t, int l);
+int vhba_send_tgt_reset(struct virtual_hba *vhba, int t);
+void complete_cmd_and_callback(struct virtual_hba *vhba, struct srb *sp,
+                              struct scsi_cmnd *cp);
+int vhba_start_scsi(struct srb *sp, u32 t, u32 l, u32 handle);
+int vhba_report_luns_cmd(struct srb *sp, u32 t, u32 l);
+int vhba_ib_disconnect_qp(struct virtual_hba *vhba);
+int vhba_purge_pending_ios(struct virtual_hba *vhba);
+struct os_lun *vhba_allocate_lun(struct virtual_hba *vhba, u32 tgt, u32 lun);
+int get_outstding_cmd_entry(struct virtual_hba *vhba);
+
+void vhba_set_tgt_count(struct virtual_hba *vhba);
+void vhba_mark_tgts_lost(struct virtual_hba *vhba);
+int vhba_set_all_tgts_offline(struct virtual_hba *vhba);
+
+int xg_vhba_start_scsi(void);
+void xg_vhba_free_device(struct virtual_hba *);
+extern int vhba_scsi_release(struct virtual_hba *vhba);
+void dump_iocb(struct cmd_type_7 *cmd_pkt);
+extern int xg_vhba_proc_info(struct Scsi_Host *shost,
+               char *buffer, char **start, off_t offset,
+               int length, int inout);
+extern int vhba_recovery_action(struct scsi_xg_vhba_host *, u32);
+extern void vhba_workqueue_processor(struct work_struct *work);
+extern int vhba_check_heart_beat(struct virtual_hba *vhba);
+
+#endif
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_stats.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_stats.c
new file mode 100644 (file)
index 0000000..b3d7c6f
--- /dev/null
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+/* #include <linux/smp_lock.h> */
+#include <linux/delay.h>
+
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+#include "vhba_defs.h"
+#include "vhba_ib.h"
+
+struct timer_list stats_timer;
+u32 stats_timer_on;
+
+void vhba_stats_clear_all(struct vhba_ha_stats *pstats)
+{
+       if (pstats == NULL) {
+               dprintk(0, -1, "NULL stats pointer passed");
+               return;
+       }
+       memset(pstats, 0, sizeof(struct vhba_ha_stats));
+}
+
+void vhba_xsmp_stats_req(struct work_struct *work)
+{
+       struct xsvhba_work *xwork =
+           container_of(work, struct xsvhba_work, work);
+       struct _vhba_stats *msg = (struct _vhba_stats *)xwork->msg;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha = NULL;
+       struct vhba_ha_stats *pstats = NULL;
+
+       vhba = vhba_get_context_by_resource_id(msg->vid);
+
+       if (vhba == NULL)
+               goto out;
+
+       ha = vhba->ha;
+       pstats = &ha->stats;
+
+       if (msg->action == 1) {
+               dprintk(TRC_STATS, NULL, "received clear stats\n");
+               vhba_stats_clear_all(pstats);
+               DEC_REF_CNT(vhba);
+               goto out;
+       } else {
+               dprintk(TRC_STATS, NULL,
+                       "received get stats action %d\n", msg->action);
+               msg->totalio = ha->stats.io_stats.total_read_reqs +
+                   ha->stats.io_stats.total_write_reqs +
+                   ha->stats.io_stats.total_task_mgmt_reqs;
+               msg->readbytecount = ha->stats.io_stats.total_read_mbytes;
+               msg->writebytecount = ha->stats.io_stats.total_write_mbytes;
+               msg->outstandingrequestcount = 0;
+               msg->iorequestcount = msg->totalio;
+               msg->readrequestcount = ha->stats.io_stats.total_read_reqs;
+               msg->writerequestcount = ha->stats.io_stats.total_write_reqs;
+               msg->taskmanagementrequestcount =
+                   ha->stats.io_stats.total_task_mgmt_reqs;
+               msg->targetcount = ha->target_count;
+               msg->luncount = ha->lun_count;
+
+               /* this is cummulative and not per vhba */
+               msg->xsmpxtdowncount = vhba_xsmp_stats.xt_state_dn_cnt;
+
+               /* this is also cumulative */
+               msg->xsmpxtoperstaterequestcount =
+                   vhba_xsmp_stats.oper_req_msg_cnt;
+               msg->mapfmrcount = ha->stats.fmr_stats.map_cnt;
+               msg->ummapfmrcount = ha->stats.fmr_stats.unmap_cnt;
+               msg->usedmapfmrcount = msg->mapfmrcount - msg->ummapfmrcount;
+               msg->abortcommandcount =
+                   ha->stats.scsi_stats.abort_success_cnt +
+                   ha->stats.scsi_stats.abort_fail_cnt;
+               msg->resetluncommandcount = 0;
+               msg->resettargetcommandcount =
+                   ha->stats.scsi_stats.dev_reset_success_cnt +
+                   ha->stats.scsi_stats.dev_reset_fail_cnt;
+               msg->resetbuscommandcount =
+                   ha->stats.scsi_stats.bus_reset_success_cnt +
+                   ha->stats.scsi_stats.bus_reset_fail_cnt;
+               msg->linkdowncount = ha->stats.fc_stats.link_dn_cnt;
+               msg->discinfoupdatecount = ha->stats.fc_stats.disc_info_cnt;
+               msg->targetlostcount = ha->stats.fc_stats.rscn_dn_cnt +
+                   ha->stats.fc_stats.rscn_multiple_dn_cnt;
+               msg->targetfoundcount = ha->stats.fc_stats.rscn_up_cnt +
+                   ha->stats.fc_stats.rscn_multiple_up_cnt;
+               msg->cqpdisconnectcount = ha->stats.ib_stats.cqp_dn_cnt;
+               msg->dqpdisconnectcount = ha->stats.ib_stats.dqp_dn_cnt;
+               msg->cqpibsenterrorcount = ha->stats.ib_stats.cqp_send_err_cnt;
+               msg->dqpibsenterrorcount = ha->stats.ib_stats.dqp_send_err_cnt;
+               msg->cqpibreceiveerrorcount =
+                   ha->stats.ib_stats.cqp_recv_err_cnt;
+               msg->dqpibreceiverrrorcount =
+                   ha->stats.ib_stats.dqp_recv_err_cnt;
+               msg->cqpibremotedisconnecterrorcount = 0;
+               msg->dqpibremotedisconnecterrorcount = 0;
+       }
+       msg->code = 0;
+       DEC_REF_CNT(vhba);
+       vhba_xsmp_ack(vhba->xsmp_hndl, (u8 *) msg, sizeof(struct _vhba_stats));
+out:
+       kfree(xwork->msg);
+       kfree(xwork);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_wq.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_wq.c
new file mode 100644 (file)
index 0000000..039268a
--- /dev/null
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/fs.h>
+#include "vhba_defs.h"
+#include "vhba_ib.h"
+#include "vhba_xsmp.h"
+#include "vhba_scsi_intf.h"
+
+#define        VHBA_WORKQUEUE                    "xsvhba_wq"
+#define VHBA_MAX_DEL_TRY                       3
+#define VHBA_MAX_TEAR_DOWN_TRY   3
+
+struct delayed_work vhba_main_work;
+struct workqueue_struct *vhba_workqueuep;
+struct reconn_sts {
+       int idr;
+       int cqp_hdl;
+       int dqp_hdl;
+};
+struct reconn_sts reconn_st[MAX_VHBAS];
+
+void vhba_internal_processing(void)
+{
+       int i = 0;
+       int reconn_count = 0;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+       unsigned long flags;
+
+       read_lock_irqsave(&vhba_global_lock, flags);
+       list_for_each_entry(vhba, &vhba_g.list, list) {
+               int got_handle = 0;
+
+               ha = vhba->ha;
+
+               /* Check IB is dead or not */
+               ib_link_dead_poll(ha);
+
+               if (atomic_read(&ha->qp_status) == VHBA_QP_RECONNECTING)
+                       vhba->qp_poll_count++;
+               else
+                       vhba->qp_poll_count = 0;
+
+               /*
+                * If we are stuck in VHBA_QP_RECONNECTING for 60+ seconds,
+                * let us try to force reconnect
+                */
+               if (vhba->qp_poll_count >= 12) {
+                       reconn_st[i].idr = vhba->idr;
+                       goto reconnect;
+               }
+
+               /*
+                * Check if IOP lost the QP context. Send a heartbeat
+                * to revive it.
+                */
+
+               if (atomic_read(&vhba->ha->ib_status) == VHBA_IB_UP) {
+                       if (vhba_check_heart_beat(vhba))
+                               vhba->heartbeat_count++;
+                       else
+                               vhba->heartbeat_count = 0;
+               }
+
+               if (vhba->heartbeat_count >= 12) {
+                       dprintk(TRC_WQ, vhba,
+                               "Sending hearbeat for QP context recovery\n");
+                       (void)vhba_send_heart_beat(vhba);
+                       vhba->heartbeat_count = 0;
+               }
+
+               if (atomic_read(&ha->qp_status) == VHBA_QP_TRYCONNECTING) {
+                       if (vhba->reconn_try_cnt < VHBA_MAX_TEAR_DOWN_TRY) {
+                               vhba->reconn_try_cnt++;
+                               continue;
+                       }
+                       vhba->reconn_attempt++;
+                       dprintk(TRC_WQ, vhba,
+                               "QP Marked for reconnect: idr=%d\n", vhba->idr);
+                       reconn_st[i].idr = vhba->idr;
+                       got_handle = 1;
+                       i++;
+               }
+
+               if (!got_handle)
+                       continue;
+
+reconnect:
+               vhba->reconn_try_cnt = 0;
+               reconn_count++;
+       }
+       read_unlock_irqrestore(&vhba_global_lock, flags);
+
+       for (i = 0; i < reconn_count; i++) {
+               vhba = vhba_get_context_by_idr(reconn_st[i].idr);
+               if (vhba == NULL) {
+                       dprintk(TRC_WQ, NULL, "No matching vhba for idr=%d\n",
+                               reconn_st[i].idr);
+                       continue;
+               }
+               ha = vhba->ha;
+               vhba_xsmp_notify(vhba->xsmp_hndl,
+                                vhba->resource_id, XSMP_VHBA_OPER_DOWN);
+               vhba_ib_disconnect_qp(vhba);
+
+               vhba_purge_pending_ios(vhba);
+
+               dprintk(TRC_INFO, vhba, "Trying to reconnect QP\n");
+               vhba_ib_connect_qp(vhba);
+               DEC_REF_CNT(vhba);
+       }
+}
+
+int vhbawq_init(void)
+{
+       vhba_workqueuep = create_singlethread_workqueue(VHBA_WORKQUEUE);
+       if (vhba_workqueuep == NULL)
+               return -1;
+
+       return 0;
+}
+
+int vhbawq_cleanup(void)
+{
+       cancel_delayed_work(&vhba_main_work);
+       flush_workqueue(vhba_workqueuep);
+       destroy_workqueue(vhba_workqueuep);
+       return 0;
+}
+
+int vhbawq_queue(void)
+{
+       INIT_DELAYED_WORK(&vhba_main_work, vhba_workqueue_processor);
+       queue_delayed_work(vhba_workqueuep, &vhba_main_work,
+                          WQ_PERIODIC_TIMER * HZ);
+       return 0;
+}
+
+void vhba_workqueue_processor(struct work_struct *work)
+{
+       vhba_internal_processing();
+       vhbawq_queue();
+}
+
+int vhba_check_heart_beat(struct virtual_hba *vhba)
+{
+       int tgt;
+       int tgt_dead = 0;
+       int ret = 0;
+       struct os_tgt *tq;
+       struct scsi_xg_vhba_host *ha = vhba->ha;
+
+       for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+               tq = TGT_Q(ha, tgt);
+               if (!tq)
+                       continue;
+               if (atomic_read(&tq->fcport->state) == FCS_DEVICE_DEAD) {
+                       tgt_dead = 1;
+                       break;
+               }
+       }
+
+       if ((tgt_dead == 1) ||
+           (vhba->ha->target_count == 0) ||
+           (atomic_read(&ha->link_state) == LINK_DEAD)) {
+               ret = 1;
+       }
+
+       return ret;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_xsmp.c b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_xsmp.c
new file mode 100644 (file)
index 0000000..836eacb
--- /dev/null
@@ -0,0 +1,984 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/delay.h>
+#ifdef CONFIG_SUSE_KERNEL
+#include <linux/hardirq.h>
+#endif
+#include "vhba_os_def.h"
+#include "vhba_xsmp.h"
+#include "vhba_ib.h"
+#include "xsmp_session.h"
+#include "vhba_defs.h"
+
+static int vhba_swap_bytes(int direction, u8 *msg);
+
+int vhba_xsmp_send_msg(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
+{
+       if (vhba_swap_bytes(H_TO_N, data)) {
+               eprintk(NULL,
+                       "Error - byte order conversion gone ");
+               eprintk(NULL, "wrong! Returning -1\n");
+               return -1;
+       }
+
+       return xcpm_send_message(xsmp_hndl, vhba_xsmp_service_id, data, length);
+}
+
+int vhba_xsmp_ack(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
+{
+       int new_length = length + sizeof(struct xsmp_message_header);
+       struct xsmp_message_header *m_header;
+       u8 *msg_offset;
+       int ret;
+       u8 *msg = kmalloc(new_length, GFP_ATOMIC);
+
+       if (!msg) {
+               eprintk(NULL,
+                       "Error - alloc for vhba xsmp_send_ack failed.");
+               eprintk(NULL, " Returning 1\n");
+               return 1;
+       }
+       m_header = (struct xsmp_message_header *)msg;
+       msg_offset = msg + sizeof(struct xsmp_message_header);
+
+       memset(msg, 0, sizeof(struct xsmp_message_header));
+
+       m_header->type = XSMP_MESSAGE_TYPE_VHBA;
+       m_header->length = new_length;
+       m_header->seq_number = 0;
+
+       memcpy(msg_offset, data, length);
+       /* msg freed by callee */
+       ret = vhba_xsmp_send_msg(xsmp_hndl, msg, new_length);
+
+       return ret;
+}
+
+int vhba_xsmp_nack(xsmp_cookie_t xsmp_hndl,
+                  u8 *data, int length, enum vhba_xsmp_error_codes nack_code)
+{
+       int new_length = length + sizeof(struct xsmp_message_header);
+       struct xsmp_message_header *m_header;
+       u8 *msg_offset;
+       int ret = 0;
+       u8 *msg = kmalloc(new_length, GFP_ATOMIC);
+
+       if (!((nack_code > VHBA_NACK_INVALID)
+             && (nack_code < VHBA_NACK_CODE_MAX))) {
+               eprintk(NULL, "Error - invalid nack code %d\n", nack_code);
+       }
+       if (!msg) {
+               eprintk(NULL,
+                       "Error - alloc for vhba xsmp_send_nack failed.");
+               eprintk(NULL, " Returning 1\n");
+               return 1;
+       }
+       m_header = (struct xsmp_message_header *)msg;
+       msg_offset = msg + sizeof(struct xsmp_message_header);
+       memset(msg, 0, sizeof(struct xsmp_message_header));
+
+       m_header->type = XSMP_MESSAGE_TYPE_VHBA;
+       m_header->length = new_length;
+       m_header->seq_number = 0;
+
+       memcpy(msg_offset, data, length);
+
+       ((struct vhba_xsmp_msg *)msg_offset)->code = nack_code | XSMP_VHBA_NACK;
+       /* msg freed by callee */
+       ret = vhba_xsmp_send_msg(xsmp_hndl, msg, new_length);
+       return ret;
+}
+
+int vhba_xsmp_notify(xsmp_cookie_t xsmp_hndl, u64 resource_id, int notifycmd)
+{
+       int length = sizeof(struct xsmp_message_header) +
+           sizeof(struct vhba_xsmp_msg);
+       int prio = (in_interrupt()) ? GFP_ATOMIC : GFP_KERNEL;
+       int ret;
+       struct xsmp_message_header *header;
+       struct vhba_xsmp_msg *xsmp_msg;
+       u8 *msg = kmalloc(length, prio);
+
+       if (!msg) {
+               eprintk(NULL,
+                       "Error - alloc for vhba xsmp_send_nack failed.");
+               eprintk(NULL, " Returning 1\n");
+               return 1;
+       }
+       header = (struct xsmp_message_header *)msg;
+       xsmp_msg = (struct vhba_xsmp_msg *)(msg + sizeof(*header));
+
+       memset(msg, 0, length);
+       header->type = XSMP_MESSAGE_TYPE_VHBA;
+       header->length = length;
+
+       xsmp_msg->type = notifycmd;
+       xsmp_msg->length = sizeof(struct vhba_xsmp_msg);
+       xsmp_msg->resource_id = resource_id;
+
+       ret = vhba_xsmp_send_msg(xsmp_hndl, msg, length);
+       if (ret) {
+               eprintk(NULL, "Error sending xsmp message %d\n", ret);
+               kfree(msg);
+       }
+       return ret;
+}
+
+static void vhba_sync_begin(struct work_struct *work)
+{
+       struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+                                                work);
+
+       xsmp_cookie_t xsmp_hndl = xwork->xsmp_hndl;
+       struct virtual_hba *vhba;
+       unsigned long flags = 0;
+
+       read_lock_irqsave(&vhba_global_lock, flags);
+       list_for_each_entry(vhba, &vhba_g.list, list) {
+               if (xsmp_sessions_match(&vhba->xsmp_info, xsmp_hndl)) {
+                       dprintk(TRC_INFO,
+                               vhba, "sync begin: xsmp_hndl=%p\n", xsmp_hndl);
+                       vhba->xsmp_hndl = xsmp_hndl;
+#if 0
+                       /*
+                        * Because of bug on chassis sometimes VHBA's
+                        * get deleted
+                        */
+                       vhba->sync_needed = 1;
+#endif
+               }
+       }
+       read_unlock_irqrestore(&vhba_global_lock, flags);
+
+       kfree(xwork->msg);
+       kfree(xwork);
+}
+
+static void vhba_sync_end(struct work_struct *work)
+{
+       struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+                                                work);
+
+       xsmp_cookie_t xsmp_hndl = xwork->xsmp_hndl;
+       struct virtual_hba *vhba = NULL;
+       struct virtual_hba *tmp_vhba;
+       unsigned long flags = 0;
+
+       /* Delete all non-sync'ed VHBAs */
+       read_lock_irqsave(&vhba_global_lock, flags);
+       list_for_each_entry_safe(vhba, tmp_vhba, &vhba_g.list, list) {
+               if (xsmp_sessions_match(&vhba->xsmp_info, xsmp_hndl)) {
+                       if (vhba->sync_needed) {
+                               read_unlock_irqrestore(&vhba_global_lock,
+                                                      flags);
+                               dprintk(TRC_INFO, vhba,
+                                       "Deleting vhba on xsmp_hndl=%p\n",
+                                       xsmp_hndl);
+                               vhba_delete(vhba->resource_id);
+                               read_lock_irqsave(&vhba_global_lock, flags);
+                       }
+               }
+       }
+       read_unlock_irqrestore(&vhba_global_lock, flags);
+       dprintk(TRC_INFO, NULL, "xsmp_hndl=%p\n", xsmp_hndl);
+       kfree(xwork->msg);
+       kfree(xwork);
+}
+
+static void vhba_xsmp_handle_oper_req(struct work_struct *work)
+{
+       struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+                                                work);
+
+       struct vhba_xsmp_msg *msg = (struct vhba_xsmp_msg *)xwork->msg;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha;
+       int qp_state = 0;
+
+       vhba = vhba_get_context_by_resource_id(msg->resource_id);
+
+       if (vhba == NULL)
+               goto out;
+       ha = vhba->ha;
+
+       qp_state = atomic_read(&ha->qp_status);
+       if (qp_state == VHBA_QP_CONNECTED) {
+               dprintk(TRC_XSMP, NULL,
+                       "SYNC: sending oper state up for vhba %p ", vhba);
+               dprintk(TRC_XSMP, NULL, "due to oper req. QP state = %d",
+                       qp_state);
+               vhba_xsmp_notify(xwork->xsmp_hndl, msg->resource_id,
+                                XSMP_VHBA_OPER_UP);
+       } else {
+               dprintk(TRC_XSMP, NULL,
+                       "SYNC: sending oper state down for vhba %p", vhba);
+               dprintk(TRC_XSMP, NULL, " due to oper req\n");
+               vhba_xsmp_notify(xwork->xsmp_hndl, msg->resource_id,
+                                XSMP_VHBA_OPER_DOWN);
+       }
+       DEC_REF_CNT(vhba);
+out:
+       kfree(xwork->msg);
+       kfree(xwork);
+}
+
+void vhba_xsmp_create(struct work_struct *work)
+{
+       struct xsvhba_work *xwork = container_of(work, struct xsvhba_work,
+                                                work);
+
+       struct vhba_xsmp_msg *msg = (struct vhba_xsmp_msg *)xwork->msg;
+
+       dprintk(TRC_XSMP, NULL, "Vhba: Type= %d Code= %d Len= %d BMask= %d\n",
+               msg->type, msg->code, msg->length, msg->bit_mask);
+
+       dprintk(TRC_XSMP, NULL, "Vhba: TCA_Lid= %d TS= %d Res_Id= %Lx\n",
+               ntohs(msg->tca_lid), msg->tapesupport, msg->resource_id);
+
+       dprintk(TRC_XSMP, NULL, "Vhba: BW= %d AS= %d QD= %d ET= %d\n",
+               msg->bandwidth, msg->adminstate, msg->scsiqueuedepth,
+               msg->executionthrottle);
+
+       dprintk(TRC_INFO, NULL, "INSTALL received for vhba:vid %s:0x%Lx\n",
+               msg->vh_name, msg->resource_id);
+       vhba_create(xwork->xsmp_hndl, msg);
+
+       kfree(xwork->msg);
+       kfree(xwork);
+}
+
+int vhba_update(xsmp_cookie_t xsmp_hndl, struct vhba_xsmp_msg *msg)
+{
+       struct scsi_xg_vhba_host *ha = NULL;
+       struct virtual_hba *vhba;
+       int ret = 0;
+
+       vhba = vhba_get_context_by_resource_id(msg->resource_id);
+
+       if (vhba == NULL) {
+               dprintk(TRC_XSMP_ERRS, NULL, "vhba not found\n");
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ha = vhba->ha;
+
+       dprintk(TRC_XSMP, vhba,
+               "xg lid %x guid %llx msg lid %x ",
+               ntohs(vhba->cfg->tca_lid), be64_to_cpu(vhba->cfg->tca_guid),
+               ntohs(msg->tca_lid));
+       dprintk(TRC_XSMP, vhba, "guid %llx %x %llx\n",
+               be64_to_cpu(msg->tca_guid), msg->tca_lid,
+               msg->tca_guid);
+
+       if (msg->bit_mask & VHBA_XT_INFO_CHANGE) {
+               dprintk(TRC_XSMP, vhba, "bit mask is %ux\n", msg->bit_mask);
+               dprintk(TRC_XSMP,
+                       vhba, "xg lid %x guid %llx msg lid %x guid %llx\n",
+                       ntohs(vhba->cfg->tca_lid),
+                       be64_to_cpu(vhba->cfg->tca_guid),
+                       ntohs(msg->tca_lid), be64_to_cpu(msg->tca_guid));
+
+               /*
+                * Make this change to handle the case when
+                * the XCM sends an vhba_update message
+                * with the same TCA GUID and LID.
+                *
+                * We now ignore the message when the TCA GUID and
+                * LID are the same as ones we have stored.
+                */
+               if ((vhba->cfg->tca_lid == msg->tca_lid) &&
+                   (vhba->cfg->tca_guid == msg->tca_guid)) {
+                       dprintk(TRC_XSMP_ERRS, vhba,
+                               "Received identical GUID and LID\n");
+                       goto out1;
+               }
+       }
+
+       if (msg->bit_mask & VHBA_XT_STATE_DOWN) {
+               dprintk(TRC_XSMP, NULL, "XT state DOWN received.\n");
+               vhba_xsmp_stats.xt_state_dn_cnt++;
+               vhba_xsmp_notify(xsmp_hndl, vhba->resource_id,
+                                XSMP_VHBA_OPER_DOWN);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+       } else if (msg->bit_mask & VHBA_XT_INFO_CHANGE) {
+               atomic_set(&vhba->vhba_state, VHBA_STATE_BUSY);
+               dprintk(TRC_XSMP, vhba,
+                       "Received new TCA LID and GUID.");
+               dprintk(TRC_XSMP, vhba, "Reconnecting QPs with new IB info\n");
+               vhba_xsmp_stats.tca_lid_changed_cnt++;
+
+               vhba->cfg->tca_lid = msg->tca_lid;
+               vhba->cfg->tca_guid = msg->tca_guid;
+
+               vhba->ctrl_conn.ctx.dguid = be64_to_cpu(msg->tca_guid);
+               vhba->data_conn.ctx.dguid = be64_to_cpu(msg->tca_guid);
+               vhba->ctrl_conn.ctx.dlid = be16_to_cpu(msg->tca_lid);
+               vhba->data_conn.ctx.dlid = be16_to_cpu(msg->tca_lid);
+
+               vhba_purge_pending_ios(vhba);
+               atomic_set(&ha->qp_status, VHBA_QP_TRYCONNECTING);
+       } else if (msg->bit_mask & VHBA_LDT_CHANGED) {
+               dprintk(TRC_XSMP, vhba,
+                       "bit mask is %08x, Update IB timer=%d\n",
+                       msg->bit_mask, msg->linkdowntimeout);
+               vhba->cfg->linkdowntimeout = msg->linkdowntimeout;
+       }
+out1:
+       DEC_REF_CNT(vhba);
+out:
+       return ret;
+}
+
+void vhba_xsmp_modify(struct work_struct *work)
+{
+       struct xsvhba_work *xwork =
+           container_of(work, struct xsvhba_work, work);
+
+       struct vhba_xsmp_msg *msg = (struct vhba_xsmp_msg *)xwork->msg;
+       int vhba_xsmp_length = sizeof(struct vhba_xsmp_msg);
+       int ret = 0;
+
+       ret = vhba_update(xwork->xsmp_hndl, msg);
+
+       if (!ret)
+               vhba_xsmp_ack(xwork->xsmp_hndl, (u8 *) msg, vhba_xsmp_length);
+       else
+               vhba_xsmp_nack(xwork->xsmp_hndl, (u8 *) msg, vhba_xsmp_length,
+                              VHBA_NACK_GENERAL_ERROR);
+
+       kfree(xwork->msg);
+       kfree(xwork);
+}
+
+static void vhba_xsmp_delete(struct work_struct *work)
+{
+       struct xsvhba_work *xwork =
+           container_of(work, struct xsvhba_work, work);
+
+       struct vhba_xsmp_msg *msg = (struct vhba_xsmp_msg *)xwork->msg;
+       int vhba_xsmp_length = sizeof(struct vhba_xsmp_msg);
+       int ret = 0;
+
+       dprintk(TRC_INFO, NULL, "DELETE received for vhba:vid %s:0x%Lx\n",
+               msg->vh_name, msg->resource_id);
+       ret = vhba_delete(msg->resource_id);
+       if (ret == -EIO) {
+               dprintk(TRC_XSMP, NULL,
+                       "delete failed. device busy, " "sending NACK\n");
+               vhba_xsmp_nack(xwork->xsmp_hndl, (u8 *) msg, vhba_xsmp_length,
+                              VHBA_NACK_DEVICE_BUSY);
+       } else {
+               vhba_xsmp_ack(xwork->xsmp_hndl, (u8 *) msg, vhba_xsmp_length);
+               dprintk(TRC_XSMP, NULL, "sent ack\n");
+       }
+       kfree(xwork->msg);
+       kfree(xwork);
+}
+
+static void vhba_xsmp_boot_msg(struct work_struct *work)
+{
+       struct xsvhba_work *xwork =
+           container_of(work, struct xsvhba_work, work);
+
+       struct vhba_boot_info *msg = (struct vhba_boot_info *)xwork->msg;
+       struct virtual_hba *vhba;
+       struct scsi_xg_vhba_host *ha = NULL;
+       struct os_tgt *tq = NULL;
+       int i, x = 0;
+       int tgt;
+       union xg_tgt_wwpn boot_xwwpn;
+       union xg_tgt_wwpn mount_xwwpn;
+
+       vhba = vhba_get_context_by_resource_id(msg->resource_id);
+
+       if (vhba == NULL)
+               goto out;
+
+       ha = vhba->ha;
+
+       ha->boot_count = msg->boot_count;
+       ha->mount_count = msg->mount_count;
+       ha->mount_type = msg->mount_type;
+
+       dprintk(TRC_XSMP, vhba,
+               "Boot count = %d\t ", ha->boot_count);
+       dprintk(TRC_XSMP, vhba, "Mount count = %d\tMount type = %d\n",
+               ha->mount_count, ha->mount_type);
+
+       for (i = 0; i < ha->boot_count; i++) {
+               memset(&ha->sanboot[i], 0,
+                      sizeof(struct host_san_vhba_list_sts));
+
+               memcpy(ha->sanboot[i].vh_name, msg->boot_devlist[i].vh_name,
+                      VHBA_NAME_LEN);
+               ha->sanboot[i].wwn = msg->boot_devlist[i].wwn;
+               ha->sanboot[i].lun = msg->boot_devlist[i].lun;
+
+               for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+                       tq = TGT_Q(ha, tgt);
+                       if (!tq)
+                               continue;
+                       else {
+                               for (x = 0; x < WWN_SIZE; x++)
+                                       boot_xwwpn.wwpn_t[7 - x] =
+                                           tq->fcport->port_name[x];
+                       }
+                       dprintk(TRC_XSMP, NULL,
+                               "Boot (local target WWN)  WWN = %Lx\n",
+                               boot_xwwpn.wwpn_val);
+
+                       if (tq && (boot_xwwpn.wwpn_val == ha->sanboot[i].wwn)) {
+                               dprintk(TRC_XSMP, NULL,
+                               "Found a wwn match for a valid trgt\n");
+                               ha->sanboot[i].tgt_num =
+                                   tq->fcport->os_target_id;
+                       }
+               }
+
+               dprintk(TRC_XSMP, vhba, "Boot device # %d\n", i);
+               dprintk(TRC_XSMP, vhba,
+                       "vh_name: %s\tWWPN:0x%llx\t Lun: 0x%x\n",
+                       ha->sanboot[i].vh_name, ha->sanboot[i].wwn,
+                       ha->sanboot[i].lun);
+       }
+
+       for (i = 0; i < ha->mount_count; i++) {
+               memcpy(&ha->sanmount[i].vh_name,
+                      &msg->mount_devlist[i].vh_name, VHBA_NAME_LEN);
+               ha->sanmount[i].wwn = msg->mount_devlist[i].wwn;
+               ha->sanmount[i].lun = msg->mount_devlist[i].lun;
+
+               for (tgt = 0; tgt < MAX_TARGETS; tgt++) {
+                       tq = TGT_Q(ha, tgt);
+                       if (!tq)
+                               continue;
+                       else {
+                               for (x = 0; x < WWN_SIZE; x++)
+                                       mount_xwwpn.wwpn_t[7 - x] =
+                                           tq->fcport->port_name[x];
+                       }
+                       dprintk(TRC_XSMP, NULL,
+                               "Mount(local target WWN)  WWN = %Lx\n",
+                               mount_xwwpn.wwpn_val);
+
+                       if (tq &&
+                       (mount_xwwpn.wwpn_val == ha->sanmount[i].wwn)) {
+                               ha->sanmount[i].tgt_num =
+                                       tq->fcport->os_target_id;
+
+                               dprintk(TRC_XSMP, NULL,
+                                       "Found a wwpn match for a valid trgt");
+                               dprintk(TRC_XSMP, NULL,
+                                       "Tgt id = %d (%d)\n",
+                                       ha->sanmount[i].tgt_num,
+                                       tq->fcport->os_target_id);
+                       }
+
+               }
+
+               dprintk(TRC_XSMP, vhba, "Mount device # %d\n", i);
+               dprintk(TRC_XSMP, vhba,
+                       "vh_name: %s\tWWPN:0x%Lx\t ",
+                       (char *)ha->sanmount[i].vh_name, ha->sanmount[i].wwn);
+               dprintk(TRC_XSMP, vhba, "Lun: 0x%x\n", ha->sanmount[i].lun);
+       }
+
+       if (ha->mount_type == 1) {
+               memcpy(ha->host_lvm_info.logical_vol_group,
+                      msg->logical_vol_group, VHBA_LVM_NAME_LEN);
+
+               memcpy(ha->host_lvm_info.logical_vol, msg->logical_vol,
+                      VHBA_LVM_NAME_LEN);
+
+               dprintk(TRC_XSMP, vhba,
+                       "Msg:   Logical vol group: %s\tLogical vol = %s\n",
+                       msg->logical_vol_group, msg->logical_vol);
+
+       } else if (ha->mount_type == 2) {
+               memcpy(ha->direct_mount_dev, msg->direct_mount_dev,
+                      VHBA_LVM_NAME_LEN);
+
+               dprintk(TRC_XSMP, NULL, "Direct mount device = %s\n",
+                       (char *)ha->direct_mount_dev);
+       }
+
+       memcpy(ha->mount_options, msg->mount_options, VHBA_MOUNT_OPT_LEN);
+
+       dprintk(TRC_XSMP, NULL, "Mount options = %s\n",
+               (char *)ha->mount_options);
+
+       vhba_xsmp_ack(xwork->xsmp_hndl, (u8 *) msg,
+                     sizeof(struct vhba_boot_info));
+
+       DEC_REF_CNT(vhba);
+out:
+       kfree(xwork->msg);
+       kfree(xwork);
+}
+
+/*   The interface function used by the XCPM to deliver messages */
+static int vhba_xsmp_msg_handler(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
+{
+       struct xsvhba_work *vhba_work;
+       void *xsmp_msg;
+       u8 *msg;
+       int type = 0;
+       int boot_type;
+       int ret = 0;
+
+       dprintk(TRC_XSMP, NULL, "New message, length <%d>\n", length);
+
+       vhba_work = kmalloc(sizeof(struct xsvhba_work), GFP_ATOMIC);
+       if (!vhba_work) {
+               eprintk(NULL, "vhba_work kmalloc failed\n");
+               ret = -1;
+               goto out;
+       }
+
+       if (length < sizeof(struct xsmp_message_header)) {
+               eprintk(NULL, "Error - Message too short. Returning -1\n");
+               ret = -1;
+               goto out;
+       }
+
+       if (vhba_swap_bytes(N_TO_H, data)) {
+               eprintk(NULL,
+                       "Errors in the received message, dropping it. ");
+               eprintk(NULL, "Returning -1\n");
+               ret = -1;
+               goto out;
+       }
+
+       if (*(u8 *) data != XSMP_MESSAGE_TYPE_VHBA) {
+               eprintk(NULL,
+                       "Error - Wrong message type, not a VHBA message. ");
+               eprintk(NULL, "Returning -1\n");
+               ret = -1;
+               goto out;
+       }
+
+       if (*(u16 *) (data + 2) != length)
+               dprintk(TRC_XSMP, NULL,
+                       "Warning - lengths are not the same, ");
+               dprintk(TRC_XSMP, NULL, "header: 0x%x, actual: 0x%x\n",
+                       *(u16 *) (data + 2), length);
+
+       msg = data + sizeof(struct xsmp_message_header);
+       length -= sizeof(struct xsmp_message_header);
+
+       boot_type = *msg;
+
+       if (boot_type == XSMP_VHBA_BOOT_INFO)
+               xsmp_msg = kmalloc(sizeof(struct vhba_boot_info), GFP_ATOMIC);
+       else
+               xsmp_msg = kmalloc(sizeof(struct vhba_xsmp_msg), GFP_ATOMIC);
+
+       if (!xsmp_msg) {
+               eprintk(NULL, "xsmp msg kmalloc failed\n");
+               ret = -1;
+               goto out;
+       }
+
+       if (boot_type == XSMP_VHBA_BOOT_INFO)
+               memcpy(xsmp_msg, msg, sizeof(struct vhba_boot_info));
+       else
+               memcpy(xsmp_msg, msg, sizeof(struct vhba_xsmp_msg));
+
+       type = *(u8 *) xsmp_msg;
+       vhba_work->xsmp_hndl = xsmp_hndl;
+       vhba_work->msg = xsmp_msg;
+       vhba_work->len = length;
+       vhba_xsmp_stats.last_msg = type;
+
+       vhba_handle_xsmp_msg(type, vhba_work);
+
+out:
+       kfree(data);
+       return ret;
+}
+
+/* The interface functions exported to the XCPM as callbacks */
+void vhba_receive_handler(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
+{
+       vhba_xsmp_msg_handler(xsmp_hndl, data, length);
+}
+
+static int vhba_swap_bytes(int direction, u8 *msg)
+{
+       int rem_length = 0;
+       int vhba_xsmp_length = sizeof(struct vhba_xsmp_msg);
+       int num_messages = 0;
+       int count = 0;
+       int type = 0;
+       int i = 0;
+
+       if (direction == N_TO_H && (*(u8 *) msg == XSMP_MESSAGE_TYPE_VHBA))
+               rem_length = ntohs(*(u16 *) (msg + 2));
+       else if (direction == H_TO_N && (*(u8 *) msg == XSMP_MESSAGE_TYPE_VHBA))
+               rem_length = *(u16 *) (msg + 2);
+       else {
+               eprintk(NULL,
+                       "Error - Hdr type not of a lcl msg. " "Returning -1\n");
+               return -1;
+       }
+
+       if (direction == H_TO_N)
+               dprintk(TRC_XSMP, NULL,
+                       "Sending message: type <0x%x>, ", *(u16 *) (msg));
+               dprintk(TRC_XSMP, NULL, "length <0x%x>\n", *(u16 *) (msg + 2));
+
+       if (direction == N_TO_H)
+               dprintk(TRC_XSMP, NULL,
+                       "Message received: XSMP type <0x%x>, ",
+                       *(u8 *) (msg));
+               dprintk(TRC_XSMP, NULL, "length <0x%x>,sequence_number <0x%x>",
+                       htons(*(u16 *) (msg + 2)),
+                       htonl(*(u32 *) (msg + 4)));
+
+       /* Swizzle the header first */
+       msg += 2;               /* Type */
+       *(u16 *) msg = htons(*(u16 *) msg);     /* Length */
+       msg += 2;
+       *(u32 *) msg = htonl(*(u32 *) msg);     /* Sequence number */
+       msg += 4;
+
+       /* Skip the source and destination IDs */
+       msg += 24;
+
+       rem_length -= sizeof(struct xsmp_message_header);
+
+       dprintk(TRC_XSMP, NULL,
+               "Msg payload length %d", rem_length);
+       dprintk(TRC_XSMP, NULL, " vhba_xsmp_length %d\n",
+               vhba_xsmp_length);
+
+       type = *(u8 *) (msg);
+       if (type == XSMP_VHBA_STATS) {
+               struct _vhba_stats *pstats = (struct _vhba_stats *)msg;
+
+               dprintk(TRC_XSMP, NULL, "received a stats message\n");
+               if (direction == N_TO_H) {
+                       pstats->length = htons(pstats->length);
+                       dprintk(TRC_XSMP, NULL, "length %d\n", pstats->length);
+                       dprintk(TRC_XSMP, NULL,
+                               "vid before (%llX)\n", pstats->vid);
+                       pstats->vid = htonq(pstats->vid);
+                       dprintk(TRC_XSMP, NULL,
+                               "vid after (%llX)\n", pstats->vid);
+               } else if (direction == H_TO_N) {
+                       pstats->vid = htonq(pstats->vid);
+                       dprintk(TRC_XSMP, NULL,
+                               "vid exit (%llX)\n", pstats->vid);
+               }
+               dprintk(TRC_XSMP, NULL, "action = %d", pstats->action);
+               return 0;
+       }
+
+       if (type == XSMP_VHBA_BOOT_INFO) {
+               struct vhba_boot_info *pboot = (struct vhba_boot_info *)msg;
+
+               dprintk(TRC_XSMP, NULL, "received a boot message\n");
+               if (direction == N_TO_H) {
+
+                       pboot->length = ntohs(pboot->length);
+                       pboot->resource_id = ntohq(pboot->resource_id);
+                       pboot->boot_count = ntohs(pboot->boot_count);
+
+                       for (i = 0; i < pboot->boot_count; i++) {
+                               pboot->boot_devlist[i].wwn =
+                                   ntohq(pboot->boot_devlist[i].wwn);
+                               dprintk(TRC_XSMP, NULL,
+                                       "WWN = %llx (%Lx)\n",
+                                       pboot->boot_devlist[i].wwn,
+                                       pboot->boot_devlist[i].wwn);
+                               pboot->boot_devlist[i].lun =
+                                   ntohs(pboot->boot_devlist[i].lun);
+                               dprintk(TRC_XSMP, NULL, "lun  = %d\n",
+                                       pboot->boot_devlist[i].lun);
+                       }
+
+                       pboot->mount_type = ntohs(pboot->mount_type);
+                       pboot->mount_count = ntohs(pboot->mount_count);
+
+                       for (i = 0; i < pboot->mount_count; i++) {
+                               dprintk(TRC_XSMP, NULL, "VHBA name = %s\n",
+                                       (char *)(pboot->mount_devlist[i].
+                                                vh_name));
+                               pboot->mount_devlist[i].wwn =
+                                   ntohq(pboot->mount_devlist[i].wwn);
+                               dprintk(TRC_XSMP, NULL, "WWN = %llx (%Lx)\n",
+                                       pboot->mount_devlist[i].wwn,
+                                       pboot->mount_devlist[i].wwn);
+                               pboot->mount_devlist[i].lun =
+                                   ntohs(pboot->mount_devlist[i].lun);
+                               dprintk(TRC_XSMP, NULL, "lun  = %d\n",
+                                       pboot->mount_devlist[i].lun);
+
+                       }
+               } else if (direction == H_TO_N)
+                       dprintk(TRC_XSMP, NULL,
+                               "Host to network message. ");
+                       dprintk(TRC_XSMP, NULL, "Doing nothing for now\n");
+
+               return 0;
+       }
+
+       if (rem_length % vhba_xsmp_length != 0) {
+               eprintk(NULL,
+                       "Error - Incorrect length XSMP header and payload,");
+               eprintk(NULL, " input_size(%d) header (%d)\n",
+                       rem_length, vhba_xsmp_length);
+               return -1;
+       }
+
+       num_messages = rem_length / vhba_xsmp_length;
+
+       for (count = 0; count < num_messages; count++) {
+               struct vhba_xsmp_msg *payload = (struct vhba_xsmp_msg *)msg;
+
+               if (rem_length == 0)
+                       return 0;
+
+               payload->length = htons(payload->length);
+               payload->bit_mask = htonl(payload->bit_mask);
+               payload->resource_id = htonq(payload->resource_id);
+
+               payload->vhba_flag = htons(payload->vhba_flag);
+               payload->mtu = htonl(payload->mtu);
+               payload->tapesupport = htons(payload->tapesupport);
+               payload->bandwidth = htons(payload->bandwidth);
+               payload->interruptdelaytimer =
+                   htonl(payload->interruptdelaytimer);
+               payload->executionthrottle = htonl(payload->executionthrottle);
+               payload->scsiqueuedepth = htonl(payload->scsiqueuedepth);
+               payload->linkdowntimeout = htonl(payload->linkdowntimeout);
+
+               payload->adminstate = htonl(payload->adminstate);
+               payload->enabletargetreset = htonl(payload->enabletargetreset);
+               payload->maxlunspertarget = htonl(payload->maxlunspertarget);
+
+               msg += vhba_xsmp_length;
+       }
+       return 0;
+}
+
+void vhba_handle_xsmp_msg(int type, struct xsvhba_work *vhba_work)
+{
+
+       switch (type) {
+       case XSMP_VHBA_INSTALL:{
+                       dprintk(TRC_XSMP, NULL,
+                               "Received XSMP_VHBA_INSTALL msg\n");
+                       vhba_xsmp_stats.install_msg_cnt++;
+                       INIT_WORK(&vhba_work->work, vhba_xsmp_create);
+                       queue_work(vhba_workqueuep, &vhba_work->work);
+                       break;
+               }
+
+       case XSMP_VHBA_DELETE:{
+                       dprintk(TRC_XSMP, NULL,
+                               "Received XSMP_VHBA_DELETE msg\n");
+                       vhba_xsmp_stats.delete_msg_cnt++;
+                       INIT_WORK(&vhba_work->work, vhba_xsmp_delete);
+                       queue_work(vhba_workqueuep, &vhba_work->work);
+                       break;
+               }
+
+       case XSMP_VHBA_UPDATE:{
+                       dprintk(TRC_XSMP, NULL,
+                               "Received XSMP_VHBA_UPDATE msg\n");
+                       vhba_xsmp_stats.update_msg_cnt++;
+                       INIT_WORK(&vhba_work->work, vhba_xsmp_modify);
+                       queue_work(vhba_workqueuep, &vhba_work->work);
+                       break;
+               }
+
+       case XSMP_VHBA_STATS:{
+                       dprintk(TRC_XSMP, NULL,
+                               "Received XSMP_VHBA_STATS msg\n");
+                       INIT_WORK(&vhba_work->work, vhba_xsmp_stats_req);
+                       queue_work(vhba_workqueuep, &vhba_work->work);
+                       vhba_xsmp_stats.cfg_stats_msg_cnt++;
+                       break;
+               }
+
+       case XSMP_VHBA_SYNC_BEGIN:{
+                       dprintk(TRC_XSMP, NULL,
+                               "SYNC: Received XSMP_VHBA_SYNC_BEGIN msg\n");
+                       vhba_xsmp_stats.sync_begin_msg_cnt++;
+                       INIT_WORK(&vhba_work->work, vhba_sync_begin);
+                       queue_work(vhba_workqueuep, &vhba_work->work);
+                       break;
+               }
+
+       case XSMP_VHBA_SYNC_END:{
+                       dprintk(TRC_XSMP, NULL,
+                               "SYNC: Received XSMP_VHBA_SYNC_END msg\n");
+                       vhba_xsmp_stats.sync_end_msg_cnt++;
+                       INIT_WORK(&vhba_work->work, vhba_sync_end);
+                       queue_work(vhba_workqueuep, &vhba_work->work);
+                       break;
+               }
+
+       case XSMP_VHBA_OPER_REQ:{
+                       dprintk(TRC_XSMP, NULL,
+                               "SYNC: Received XSMP_VHBA_OPER_REQ\n");
+                       vhba_xsmp_stats.oper_req_msg_cnt++;
+                       INIT_WORK(&vhba_work->work, vhba_xsmp_handle_oper_req);
+                       queue_work(vhba_workqueuep, &vhba_work->work);
+                       break;
+               }
+
+       case XSMP_VHBA_BOOT_INFO:{
+                       dprintk(TRC_XSMP, NULL,
+                               "Received XSMP_VHBA_BOOT_INFO msg\n");
+                       vhba_xsmp_stats.boot_msg_cnt++;
+                       INIT_WORK(&vhba_work->work, vhba_xsmp_boot_msg);
+                       queue_work(vhba_workqueuep, &vhba_work->work);
+                       break;
+               }
+
+       default:{
+                       dprintk(TRC_XSMP, NULL,
+                               "Warning - Invalid session message. ");
+                       dprintk(TRC_XSMP, NULL, "Returning -1\n");
+                       vhba_xsmp_stats.unknown_msg_cnt++;
+                       vhba_xsmp_stats.last_unknown_msg = type;
+                       kfree(vhba_work);
+               }
+       }
+}
+
+int vhba_create_context(struct vhba_xsmp_msg *msg, struct virtual_hba *vhba)
+{
+       u32 idr;
+       int ret = 0;
+       unsigned long flags = 0;
+       struct virtual_hba *t_vhba;
+       u64 resource_id = msg->resource_id;
+
+       write_lock_irqsave(&vhba_global_lock, flags);
+       list_for_each_entry(t_vhba, &vhba_g.list, list) {
+               if (t_vhba->resource_id == resource_id) {
+                       /*
+                        * Already in the list, may have been due to sync-begin
+                        * operation. Reset the sync flag and return
+                        */
+                       dprintk(TRC_INFO, t_vhba,
+                               "vhba already in the list: vid 0x%Lx\n",
+                               t_vhba->resource_id);
+                       t_vhba->sync_needed = 0;
+                       ret = 0;
+                       goto out;
+               }
+       }
+
+       idr = idr_alloc(&vhba_idr_table, (void *)vhba, vhba_current_idr,
+                       vhba_current_idr + 1, GFP_KERNEL);
+       if (idr < 0) {
+               ret = -1;
+               goto out;
+       }
+
+       vhba->idr = idr;
+       vhba->resource_id = resource_id;
+       vhba_current_idr = idr + 1;
+       ret = 1;
+
+out:
+       write_unlock_irqrestore(&vhba_global_lock, flags);
+       return ret;
+}
+
+void vhba_add_context(struct virtual_hba *vhba)
+{
+       unsigned long flags = 0;
+
+       atomic_inc(&vhba->ref_cnt);
+       write_lock_irqsave(&vhba_global_lock, flags);
+       list_add_tail(&vhba->list, &vhba_g.list);
+       write_unlock_irqrestore(&vhba_global_lock, flags);
+}
+
+struct virtual_hba *vhba_remove_context(u64 resource_id)
+{
+       struct virtual_hba *vhba = NULL;
+       unsigned long flags = 0;
+
+       write_lock_irqsave(&vhba_global_lock, flags);
+       list_for_each_entry(vhba, &vhba_g.list, list) {
+               if (vhba->resource_id == resource_id)
+                       goto out;
+       }
+       write_unlock_irqrestore(&vhba_global_lock, flags);
+       return NULL;
+out:
+       idr_remove(&vhba_idr_table, vhba->idr);
+       atomic_dec(&vhba->ref_cnt);
+       list_del(&vhba->list);
+       write_unlock_irqrestore(&vhba_global_lock, flags);
+       return vhba;
+}
+
+struct virtual_hba *vhba_get_context_by_resource_id(u64 resource_id)
+{
+       struct virtual_hba *vhba = NULL;
+       unsigned long flags = 0;
+
+       read_lock_irqsave(&vhba_global_lock, flags);
+       list_for_each_entry(vhba, &vhba_g.list, list) {
+               if (vhba->resource_id == resource_id)
+                       goto out;
+       }
+       read_unlock_irqrestore(&vhba_global_lock, flags);
+       return NULL;
+out:
+       atomic_inc(&vhba->ref_cnt);
+       read_unlock_irqrestore(&vhba_global_lock, flags);
+       return vhba;
+}
+
+struct virtual_hba *vhba_get_context_by_idr(u32 idr)
+{
+       struct virtual_hba *vhba;
+       unsigned long flags = 0;
+
+       read_lock_irqsave(&vhba_global_lock, flags);
+       vhba = idr_find(&vhba_idr_table, idr);
+       if (vhba)
+               atomic_inc(&vhba->ref_cnt);
+
+       read_unlock_irqrestore(&vhba_global_lock, flags);
+       return vhba;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xsvhba/vhba_xsmp.h b/drivers/infiniband/ulp/xsigo/xsvhba/vhba_xsmp.h
new file mode 100644 (file)
index 0000000..d1dfe41
--- /dev/null
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __VHBA_XSMP_H__
+#define __VHBA_XSMP_H__
+
+#include <linux/types.h>
+#include <rdma/ib_verbs.h>
+#include <linux/workqueue.h>
+
+#include "xscore.h"
+#include "xsmp_common.h"
+#include "vhba_os_def.h"
+
+extern int vhba_xsmp_init(void);
+extern void vhba_xsmp_exit(void);
+extern struct fc_function_template vhba_transport_functions;
+
+#define XSMP_VHBA_INSTALL             1
+#define XSMP_VHBA_DELETE              2
+#define XSMP_VHBA_UPDATE              3
+#define XSMP_VHBA_ADMIN_UP            4
+#define XSMP_VHBA_ADMIN_DOWN          5
+#define XSMP_VHBA_OPER_UP             6
+#define XSMP_VHBA_OPER_DOWN           7
+#define XSMP_VHBA_OPER_READY          8
+#define XSMP_VHBA_STATS_REQ           9
+#define XSMP_VHBA_STATS              10
+#define XSMP_VHBA_SYNC_BEGIN         11
+#define XSMP_VHBA_SYNC_END           12
+#define XSMP_VHBA_INFO_REQUEST       13
+#define XSMP_VHBA_OPER_REQ           14
+#define XSMP_VHBA_BOOT_INFO          15
+#define XSMP_VHBA_TYPE_MAX           16
+
+#define VHBA_PORT_RATE_CHANGED        0x1
+#define VHBA_TAPE_SUPPORT_CHANGED     0x2
+#define VHBA_IDT_CHANGED              0x4
+#define VHBA_ET_CHANGED               0x8
+#define VHBA_SCSI_Q_DPTH_CHANGED      0x10
+#define VHBA_LDT_CHANGED              0x20
+#define VHBA_ADMINSTATE_CHANGED       0x100
+#define VHBA_TGT_RESET_CHANGED        0x40
+#define VHBA_LUNS_PER_TGT_CHANGED     0x80
+
+#define ADMINSTATE_DOWN               0x0
+#define ADMINSTATE_UP                 0x1
+
+#define MAX_NUM_LINKS                  32
+enum vhba_xsmp_error_codes {
+       VHBA_NACK_INVALID,      /* 0 */
+       VHBA_NACK_DUP_NAME,     /* 1 */
+       VHBA_NACK_DUP_VID,      /* 2 */
+       VHBA_NACK_LIMIT_REACHED,        /* 3 */
+       VHBA_NACK_ALLOC_ERROR,  /* 4 */
+       VHBA_NACK_INVALID_STATE,        /* 5 */
+       VHBA_NACK_DEVICE_BUSY,  /* 6 */
+
+       VHBA_NACK_INS_APP_TIMEOUT,      /* 7 */
+       VHBA_NACK_UNINST_APP_TIMEOUT,   /* 8 */
+       VHBA_NACK_INS_APP_ERROR,        /* 9 */
+       VHBA_NACK_UNINS_APP_ERROR,      /* 10 */
+       VHBA_NACK_GENERAL_ERROR,        /* 11 */
+       VHBA_NACK_LOCAL_DISABLED,       /* 12 */
+
+       VHBA_NACK_HA_GROUP_NAME_MISMATCH,       /* 13 */
+       VHBA_NACK_HA_MAC_ADDRESS_MISMATCH,      /* 14 */
+       VHBA_NACK_HA_MTU_SIZE_MISMATCH, /* 15 */
+
+       VHBA_NACK_LA_GROUP_NAME_MISMATCH,       /* 16 */
+       VHBA_NACK_LA_MAC_ADDRESS_MISMATCH,      /* 17 */
+       VHBA_NACK_LA_MTU_SIZE_MISMATCH, /* 18 */
+       VHBA_NACK_LA_POLICY_MISMATCH,   /* 19 */
+
+       VHBA_NACK_CODE_MAX,     /* 20 */
+};
+
+/* Ack and Nack sent out in the 'code' field */
+#define  XSMP_VHBA_ACK          (1 << 6)
+#define  XSMP_VHBA_NACK         (1 << 7)
+
+#define H_TO_N 0
+#define N_TO_H 1
+
+#define ntohq be64_to_cpu
+#define htonq cpu_to_be64
+#define VHBA_NAME_LEN               16
+#define VHBA_MAX_BOOT_DEV           6
+#define VHBA_MAX_MOUNT_DEV          6
+#define VHBA_LVM_NAME_LEN           128
+#define VHBA_MOUNT_OPT_LEN          32
+
+struct san_vhba_list_sts {
+       u8 vh_name[VHBA_NAME_LEN];
+       u64 wwn;
+       u16 lun;
+} __packed;
+
+struct vhba_boot_info {
+       /* standard header fields */
+       u8 type;
+       u8 code;
+       u16 length;
+
+       u64 resource_id;
+
+       /* Count of boot devices specified */
+       u16 boot_count;
+       struct san_vhba_list_sts boot_devlist[VHBA_MAX_BOOT_DEV];
+
+       u16 mount_type;         /* 1 = use logical vol group, 0 = use vhba */
+       u8 logical_vol_group[VHBA_LVM_NAME_LEN];
+       u8 logical_vol[VHBA_LVM_NAME_LEN];
+       u8 direct_mount_dev[VHBA_LVM_NAME_LEN];
+       u8 mount_options[VHBA_MOUNT_OPT_LEN];
+
+       u16 mount_count;        /* count of mount devices */
+       struct san_vhba_list_sts mount_devlist[VHBA_MAX_MOUNT_DEV];
+
+       /*
+        * Padding reserves u8s to make the V* message size = 960.
+        * If you add new variables to the structure,
+        * you should adjust the paddings
+        */
+       u8 reserved[214];
+} __packed;
+
+struct vhba_xsmp_msg {
+       union {
+               struct {
+                       u8 type;
+                       u8 code;
+                       u16 length;
+                       u32 bit_mask;
+
+                       u64 resource_id;
+                       u64 wwn;
+                       u64 tca_guid;
+
+                       u16 tca_lid;
+                       u16 vhba_flag;
+                       u32 bandwidth;
+
+                       u32 tapesupport;
+                       u32 interruptdelaytimer;
+
+                       u32 executionthrottle;
+                       u32 scsiqueuedepth;
+
+                       u32 linkdowntimeout;
+                       u32 adminstate;
+
+                       u32 enabletargetreset;
+                       u32 maxlunspertarget;
+
+                       u32 num_queues; /* Maximum 4  (0 .. 3) */
+                       u8 vm_index;
+                       u8 lunmask_enable;
+                       u16 tca_slot;
+
+                       u8 vh_name[VHBA_NAME_LEN];
+
+                       struct {
+                               /*
+                                * Right now only one target,
+                                * LUN combination per queue (default q 0)
+                                * Actual rates are used only in I/O card side
+                                */
+                               u8 target[WWN_SIZE];
+                               u32 lun;
+                       } __packed q_classification[MAX_VHBA_QUEUES];
+
+                       uint32_t mtu;
+
+               } __packed;
+               uint8_t bytes[512];
+       };
+} __packed;
+
+#define MAX_XSMP_MSG_SIZE sizeof(struct vhba_xsmp_msg)
+
+struct _vhba_stats_config_msg {
+       u8 type;
+       u8 code;
+       u16 length;
+
+       u32 data_class_id;
+       u32 collection_interval;
+       u32 updatesper_interval;
+       u32 updatefrequency;
+
+       /*
+        * Padding reserves u8s to make the V* message size = 512.
+        * If you add new variables to the structure,
+        * you should adjust the paddings
+        */
+       u8 reserved[492];
+} __packed;
+
+union _stats_obj_union {
+       struct vhba_xsmp_msg gen_config;
+       struct _vhba_stats_config_msg stats_config;
+
+       /*
+        * Padding reserves u8s to make the V* message size = 512.
+        * If you add new variables to the structure,
+        * you should adjust the paddings
+        */
+       u8 reserved[368];
+} __packed;
+
+struct _vhba_stats {
+       u8 type;                /* Stats type (MIMM stats id) */
+       u8 code;                /* NACK reason */
+       u16 length;
+       u8 action;              /* clear = 1, otherwise = get */
+       u8 reserv[3];
+       u64 vid;
+       u64 statscookie;
+       u64 totalio;
+       u64 readbytecount;
+       u64 writebytecount;
+       u64 outstandingrequestcount;
+       u64 iorequestcount;
+       u64 readrequestcount;
+       u64 writerequestcount;
+       u64 taskmanagementrequestcount;
+       u64 targetcount;
+       u64 luncount;
+       u64 xsmpxtdowncount;
+       u64 xsmpxtoperstaterequestcount;
+       u64 mapfmrcount;
+       u64 ummapfmrcount;
+       u64 usedmapfmrcount;
+       u64 abortcommandcount;
+       u64 resetluncommandcount;
+       u64 resettargetcommandcount;
+       u64 resetbuscommandcount;
+       u64 linkdowncount;
+       u64 discinfoupdatecount;
+       u64 targetlostcount;
+       u64 targetfoundcount;
+       u64 cqpdisconnectcount;
+       u64 dqpdisconnectcount;
+       u64 cqpibsenterrorcount;
+       u64 dqpibsenterrorcount;
+       u64 cqpibreceiveerrorcount;
+       u64 dqpibreceiverrrorcount;
+       u64 cqpibremotedisconnecterrorcount;
+       u64 dqpibremotedisconnecterrorcount;
+
+       /*
+        * Padding reserves u8s to make the V* message size = 512.
+        * If you add new variables to the structure,
+        * you should adjust the paddings
+        */
+       u8 reserved[240];
+} __packed;
+
+struct vhba_wq_msg {
+       struct work_struct *work;
+       u32 idr;
+       void *data;
+       struct ib_link_info *link;
+};
+
+extern void vhba_receive_handler(xsmp_cookie_t xsmp_hndl, u8 *data,
+                                int length);
+extern void vhba_abort_handler(xsmp_cookie_t xsmp_hndl);
+
+extern int vhba_xsmp_service_id;
+
+int vhba_create(xsmp_cookie_t xsmp_hndl, struct vhba_xsmp_msg *msg);
+int vhba_delete(u64 resource_id);
+int vhba_update(xsmp_cookie_t xsmp_hndl, struct vhba_xsmp_msg *msg);
+int vhba_config_stats(xsmp_cookie_t xsmp_hndl,
+                     union _stats_obj_union *vhba_stats_cfg);
+
+int vhba_create_context(struct vhba_xsmp_msg *, struct virtual_hba *);
+void vhba_add_context(struct virtual_hba *);
+struct virtual_hba *vhba_remove_context(u64);
+struct virtual_hba *vhba_get_context_by_idr(u32);
+struct virtual_hba *vhba_get_context_by_resource_id(u64);
+int vhba_check_context(struct virtual_hba *);
+
+#define VHBA_XT_STATE_DOWN (0x40000000)
+#define VHBA_XT_INFO_CHANGE (0x80000000)
+
+extern int vhba_debug;
+extern unsigned long vhba_wait_time;
+extern struct vhba_xsmp_stats vhba_xsmp_stats;
+
+extern void vhba_xsmp_stats_req(struct work_struct *work);
+extern int vhba_xsmp_notify(xsmp_cookie_t xsmp_hndl, u64 resource_id,
+                           int notifycmd);
+extern int vhba_xsmp_send_msg(xsmp_cookie_t xsmp_hndl, u8 *data, int length);
+extern int vhba_xsmp_ack(xsmp_cookie_t xsmp_hndl, u8 *data, int length);
+int vhba_xsmp_ack(xsmp_cookie_t xsmp_hndl, u8 *data, int length);
+int vhba_xsmp_nack(xsmp_cookie_t xsmp_hndl, u8 *data, int length,
+                  enum vhba_xsmp_error_codes);
+int stop_stats_collection(void);
+int insert_iocb(struct virtual_hba *, int val, void **r_ptr);
+extern int vhba_register_xsmp_service(void);
+extern void vhba_unregister_xsmp_service(void);
+extern void vhba_handle_xsmp_msg(int, struct xsvhba_work *vhba_work);
+
+#define DEBUG 1
+
+#define TRC_ERRORS             0x000001
+#define TRC_INIT               0x000002
+#define TRC_XSMP               0x000004
+#define TRC_XSMP_ERRS          0x000008
+#define TRC_IB                 0x000010
+#define TRC_IB_ERRS            0x000020
+#define TRC_SCSI               0x000040
+#define TRC_SCSI_ERRS          0x000080
+#define TRC_FMR                        0x000100
+#define TRC_FMR_ERRS           0x000200
+#define TRC_IO                 0x000400
+#define TRC_UNALIGNED          0x000800
+#define TRC_PROC               0x001000
+#define TRC_ERR_RECOV          0x002000
+#define TRC_TIMER              0x004000
+#define TRC_CQP                        0x008000
+#define TRC_SCAN               0x010000
+#define TRC_MGMT               0x020000
+#define TRC_STATS              0x040000
+#define TRC_FUNCS              0x080000
+#define TRC_WQ                 0x100000
+#define TRC_INFO               0x200000
+
+#ifdef DEBUG
+#define eprintk(vhba, fmt, args...)                            \
+{                                                              \
+       struct virtual_hba *v_hba = (struct virtual_hba *)vhba; \
+       if (v_hba != NULL) {                                    \
+               if ((v_hba->cfg) && (v_hba->cfg->vh_name))      \
+                       pr_info("<vhba %s> %s: " fmt,   \
+                               (char *) (v_hba->cfg->vh_name), \
+                                __func__ , ## args);           \
+       } else {                                                \
+               pr_info("%s: " fmt, __func__ , ## args);                \
+       }                                                       \
+}
+#else
+#define eprintk(fmt, args...)
+#endif
+
+#ifdef DEBUG
+#define dprintk(level, vhba, fmt, args...)                             \
+do {                                                                   \
+       struct virtual_hba *v_hba = (struct virtual_hba *)vhba;         \
+       if ((vhba_debug & level) == level) {                            \
+               if (v_hba != NULL) {                                    \
+                       if ((v_hba->cfg) && (v_hba->cfg->vh_name))      \
+                               pr_info("<vhba %s> %s: " fmt,   \
+                                       (char *) (v_hba->cfg->vh_name), \
+                                       __func__ , ## args);            \
+               } else {                                                \
+                               pr_info("%s: " fmt, __func__    \
+                                       , ## args);                     \
+               }                                                       \
+       }                                                               \
+} while (0)
+
+#define vhba_debug(level, vhba, fmt, args...)                          \
+do {                                                                   \
+       struct virtual_hba *v_hba = (struct virtual_hba *)vhba;         \
+       if ((vhba_debug & level) == level) {                            \
+               if (v_hba != NULL) {                                    \
+                       if ((v_hba->cfg) && (v_hba->cfg->vh_name))      \
+                               pr_info("<vhba %s> %32s: " fmt,\
+                                       (char *)(v_hba->cfg->vh_name),  \
+                                       __func__ , ## args);            \
+               } else  {                                               \
+                               pr_info("%s: " fmt, __func__,   \
+                                        ## args);                      \
+               }                                                       \
+       }                                                               \
+} while (0)
+#else
+#define dprintk(level, vhba, fmt, args...)
+#endif
+
+#define assert(expr)                                           \
+do {                                                           \
+       if (!(expr)) {                                          \
+               pr_info("Assertion failed! %s,%s,%s,line=%d\n", \
+                       #expr, __FILE__, __func__, __LINE__);   \
+       }                                                       \
+} while (0)
+
+int vhba_purge_pending_ios(struct virtual_hba *vhba);
+
+#endif /* __VHBA_XSMP_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xsvnic/Kconfig b/drivers/infiniband/ulp/xsigo/xsvnic/Kconfig
new file mode 100644 (file)
index 0000000..0df39bd
--- /dev/null
@@ -0,0 +1,5 @@
+config INFINIBAND_XSVNIC
+        tristate "Xsigo Virtual NIC"
+        depends on INFINIBAND_XSCORE
+        ---help---
+          Support for the Xsigo vNIC Functionality.
diff --git a/drivers/infiniband/ulp/xsigo/xsvnic/Makefile b/drivers/infiniband/ulp/xsigo/xsvnic/Makefile
new file mode 100644 (file)
index 0000000..2e47dad
--- /dev/null
@@ -0,0 +1,9 @@
+obj-$(CONFIG_INFINIBAND_XSVNIC) := xsvnic.o
+xsvnic-y := xsvnic_main.o xsvnic_stats.o
+
+ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\"
+ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS
+ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE
+ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT
+ccflags-y += -Idrivers/infiniband/ulp/xsigo/xscore
+ccflags-y += -Idrivers/infiniband/include
diff --git a/drivers/infiniband/ulp/xsigo/xsvnic/xsvnic.h b/drivers/infiniband/ulp/xsigo/xsvnic/xsvnic.h
new file mode 100644 (file)
index 0000000..520c8dc
--- /dev/null
@@ -0,0 +1,600 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSVNIC_H__
+#define __XSVNIC_H__
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+#include <linux/inet_lro.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+
+#include "xscore.h"
+#include "xsmp_common.h"
+#include "xsvnic_xsmp_msgs.h"
+#include "xsmp_session.h"
+
+#ifndef        XSIGO_LOCAL_VERSION
+#define XSVNIC_DRIVER_VERSION "0.31"
+#else
+#define XSVNIC_DRIVER_VERSION XSIGO_LOCAL_VERSION
+#endif
+
+#define XSVNIC_MIN_PACKET_LEN  60
+#define XSVNIC_MAX_BUF_SIZE    1024
+#define        XSVNIC_MACLIST_MAX      128
+#define TCA_SERVICE_ID         0x1001ULL
+#define        XSVNIC_VLANLIST_MAX     500
+#define XS_RXBAT_HDRLEN                4
+#define RXBAT_FORMAT_OFFSET(a)  ((a >> 30) & 0x3)
+#define RXBAT_FINAL_BIT(a)     ((a >> 29) & 0x1)
+#define RXBAT_FRAG_LEN(a)      (a & 0x3fff)
+
+#define  GET_MAX(val, len)                                             \
+       do {                                                            \
+               if ((val == 0) || ((len > val) && (len != 0)))          \
+                       val = len;                                      \
+       } while (0)
+
+#define  GET_MIN(val, len)                                             \
+       do {                                                            \
+               if ((val == 0) || ((len < val) && (len != 0)))          \
+                       val = len;                                      \
+       } while (0)
+
+#define CALC_MAX_PKT_RX(p, len)                                                \
+       do {                                                            \
+               GET_MAX(p->counters[XSVNIC_RX_MAX_PKT], len);           \
+               GET_MIN(p->counters[XSVNIC_RX_MIN_PKT], len);           \
+       } while (0)
+
+#define CALC_MAX_PKT_TX(p, len)                                                \
+       do {                                                            \
+               GET_MAX(p->counters[XSVNIC_TX_MAX_PKT], len);           \
+               GET_MIN(p->counters[XSVNIC_TX_MIN_PKT], len);           \
+       } while (0)
+
+#define CALC_MAX_MIN_TXTIME(p, time)                                   \
+       do {                                                            \
+               unsigned long tot_time = (jiffies - time);              \
+               GET_MAX(p->counters[XSVNIC_TX_MAX_TIME], tot_time);     \
+               GET_MIN(p->counters[XSVNIC_TX_MIN_TIME], tot_time);     \
+       } while (0)
+
+#define XSIGO_DUMP_PKT(a, b, c)                                                \
+       do {                                                            \
+               if (xsvnic_debug & DEBUG_DUMP_PKTS)                     \
+                       dumppkt(a, b, c);                               \
+       } while (0)
+
+#define        XSIGO_DEVICE_PREFIX     ""
+
+#define XSVNIC_IO_QP_TYPE_CONTROL      0
+#define XSVNIC_IO_QP_TYPE_DATA         1
+
+enum {
+       XSVNIC_CONN_INIT,
+       XSVNIC_CONN_CONNECTING,
+       XSVNIC_CONN_CONNECTED,
+       XSVNIC_CONN_DISCONNECTING,
+       XSVNIC_CONN_DISCONNECTED,
+       XSVNIC_CONN_ERROR
+};
+
+struct xsvnic_conn {
+       u8 type;
+       int state;
+       struct xscore_conn_ctx ctx;
+};
+
+/*
+ * Private data format passed in a connection request
+ */
+
+struct xt_cm_private_data {
+       u64 vid;
+       u16 qp_type;
+       u16 max_ctrl_msg_size;
+       u32 data_qp_type;
+#define        XSVNIC_TSO_BIT          (1 << 1)
+#define        XSVNIC_RXBAT_BIT        (1 << 2)
+#define        XSVNIC_RXBAT_TIMER_BIT  (1 << 3)
+} __packed;
+
+struct xsvnic_control_msg {
+       u8 type;
+       u8 _reserved;
+       u16 length;
+       u32 data;
+} __packed;
+
+/*lro specifics*/
+enum {
+       XSVNIC_MAX_LRO_DESCRIPTORS = 8,
+       XSVNIC_LRO_MAX_AGGR = 64,
+};
+
+/*
+ * Types for the control messages, events, and statistics
+ * sent using the 'struct xsvnic_control_msg' above
+ */
+enum xsvnic_control_msg_type {
+       XSVNIC_START_TX = 16,
+       XSVNIC_STOP_TX,
+       XSVNIC_START_RX,
+       XSVNIC_STOP_RX,
+       XSVNIC_RX_COALESCE_NUM_PACKETS,
+       XSVNIC_RX_COALESCE_MSECS,
+       XSVNIC_LINK_UP,
+       XSVNIC_LINK_DOWN,
+       XSVNIC_ASSIGN_IP,
+       XSVNIC_ASSIGN_VLAN,
+       XSVNIC_UNASSIGN_VLAN,
+       XSVNIC_STATS_REQUEST,
+       XSVNIC_STATS_RESPONSE,
+       XSVNIC_MAC_ADDRESS_REPORT,
+       XSVNIC_MULTICAST_LIST_SEND,
+       XSVNIC_START_RX_RESPONSE,
+       XSVNIC_VPORT_STATUS_UPDATE,
+       XSVNIC_MULTICAST_LIST_RESPONSE,
+       XSVNIC_HEART_BEAT,
+       MAX_XSVNIC_CTL_MSG_TYPE
+};
+
+struct xsvnic_start_rx_resp_msg {
+       u8 port_speed;
+};
+
+struct xsvnic_link_up_msg {
+       u8 port_speed;
+};
+
+enum xnic_bw {
+       XNIC_BW_0,              /* link down state */
+       XNIC_BW_100MbPS,
+       XNIC_BW_10MbPS,
+       XNIC_BW_200MbPS,
+       XNIC_BW_500MbPS,
+       XNIC_BW_800MbPS,
+       XNIC_BW_1GbPS,
+       XNIC_BW_2GbPS,
+       XNIC_BW_3GbPS,
+       XNIC_BW_4GbPS,
+       XNIC_BW_5GbPS,
+       XNIC_BW_6GbPS,
+       XNIC_BW_7GbPS,
+       XNIC_BW_8GbPS,
+       XNIC_BW_9GbPS,
+       XNIC_BW_10GbPS,
+       XNIC_BW_UNKNOWN,
+};
+
+struct vlan_entry {
+       struct list_head vlan_list;
+       unsigned short vlan_id;
+};
+
+enum {
+       XSVNIC_SYNC_END_DEL_COUNTER,
+       XSVNIC_VNIC_INSTALL_COUNTER,
+       XSVNIC_VNIC_DEL_COUNTER,
+       XSVNIC_VNIC_DEL_NOVID_COUNTER,
+       XSVNIC_VNIC_UPDATE_COUNTER,
+       XSVNIC_VNIC_SYNC_BEGIN_COUNTER,
+       XSVNIC_VNIC_SYNC_END_COUNTER,
+       XSVNIC_VNIC_OPER_REQ_COUNTER,
+       XSVNIC_VNIC_UNSUP_XSMP_COUNTER,
+       XSVNIC_ISCSI_INFO_COUNTER,
+       XSVNIC_DEVICE_REMOVAL_COUNTER,
+       XSVNIC_MAX_GLOB_COUNTERS
+};
+
+enum {
+       XSVNIC_CTRL_HBEAT_COUNTER,
+       XSVNIC_DATA_HBEAT_COUNTER,
+       XSVNIC_HBEAT_ERR_COUNTER,
+       XSVNIC_NAPI_POLL_COUNTER,
+       XSVNIC_SHORT_PKT_COUNTER,
+       XSVNIC_TX_COUNTER,
+       XSVNIC_TX_SKB_TSO_COUNTER,
+       XSVNIC_TX_SKB_NOHEAD_COUNTER,
+       XSVNIC_TX_SKB_FREE_COUNTER,
+       XSVNIC_TX_SKB_FREE_COUNTER_REAP,
+       XSVNIC_TX_EXPAND_HEAD_COUNTER,
+       XSVNIC_TX_EXPAND_HEAD_ECNTR,
+       XSVNIC_TX_VLAN_COUNTER,
+       XSVNIC_TX_ERROR_COUNTER,
+       XSVNIC_TX_WRB_EXHAUST,
+       XSVNIC_TX_DROP_OPER_DOWN_COUNT,
+       XSVNIC_TX_SKB_ALLOC_ERROR_COUNTER,
+       XSVNIC_TX_EXPANDSKB_ERROR,
+       XSVNIC_TX_RING_FULL_COUNTER,
+       XSVNIC_RX_SKB_COUNTER,
+       XSVNIC_RX_SKB_ALLOC_COUNTER,
+       XSVNIC_RX_SENDTO_VLANGRP,
+       XSVNIC_RXBAT_PKTS,
+       XSVNIC_RX_SKB_FREE_COUNTER,
+       XSVNIC_RX_MAXBATED_COUNTER,
+       XSVNIC_RXBAT_BELOW_5SEGS,
+       XSVNIC_RXBAT_BTW_5_10SEGS,
+       XSVNIC_RXBAT_BTW_10_20SEGS,
+       XSVNIC_RXBAT_ABOVE_20SEGS,
+       XSVNIC_8KBAT_PKTS,
+       XSVNIC_RX_SKB_OFFLOAD_COUNTER,
+       XSVNIC_RX_SKB_OFFLOAD_FRAG_COUNTER,
+       XSVNIC_RX_SKB_OFFLOAD_NONIPV4_COUNTER,
+       XSVNIC_RX_ERROR_COUNTER,
+       XSVNIC_RX_QUOTA_EXCEEDED_COUNTER,
+       XSVNIC_RX_NOBUF_COUNTER,
+       XSVNIC_RX_MAX_PKT,
+       XSVNIC_RX_MIN_PKT,
+       XSVNIC_RX_LRO_AGGR_PKTS,
+       XSVNIC_RX_LRO_FLUSHED_PKT,
+       XSVNIC_RX_LRO_AVG_AGGR_PKTS,
+       XSVNIC_RX_LRO_NO_DESCRIPTORS,
+       XSVNIC_TX_MAX_PKT,
+       XSVNIC_TX_MIN_PKT,
+       XSVNIC_TX_MAX_TIME,
+       XSVNIC_TX_MIN_TIME,
+       XSVNIC_NAPI_SCHED_COUNTER,
+       XSVNIC_NAPI_NOTSCHED_COUNTER,
+       XSVNIC_PORT_LINK_UP_COUNTER,
+       XSVNIC_PORT_LINK_DOWN_COUNTER,
+       XSVNIC_DUP_PORT_LINK_UP_COUNTER,
+       XSVNIC_DUP_PORT_LINK_DOWN_COUNTER,
+       XSVNIC_START_RX_COUNTER,
+       XSVNIC_STOP_RX_COUNTER,
+       XSVNIC_START_RX_RESP_COUNTER,
+       XSVNIC_BAD_RX_RESP_COUNTER,
+       XSVNIC_OPEN_COUNTER,
+       XSVNIC_STOP_COUNTER,
+       XSVNIC_GETSTATS_COUNTER,
+       XSVNIC_SET_MCAST_COUNTER,
+       XSVNIC_MCAST_LIST_RESP_COUNTER,
+       XSVNIC_MCAST_LIST_NORESP_COUNTER,
+       XSVNIC_VLAN_RX_ADD_COUNTER,
+       XSVNIC_VLAN_RX_DEL_COUNTER,
+       XSVNIC_IOCTL_COUNTER,
+       XSVNIC_MAC_ADDR_CHNG,
+       XSVNIC_WDOG_TIMEOUT_COUNTER,
+       XSVNIC_OPER_REQ_COUNTER,
+       XSVNIC_XT_DOWN_COUNTER,
+       XSVNIC_XT_UPDATE_COUNTER,
+       XSVNIC_XT_LID_CHANGE_COUNTER,
+       XSVNIC_ADMIN_UP_COUNTER,
+       XSVNIC_ADMIN_DOWN_COUNTER,
+       XSVNIC_OPER_UP_STATE_COUNTER,
+       XSVNIC_QP_ERROR_COUNTER,
+       XSVNIC_IB_RECOVERY_COUNTER,
+       XSVNIC_IB_RECOVERED_COUNTER,
+       XSVNIC_IBLINK_DOWN_COUNTER,
+       XSVNIC_IBLINK_UP_COUNTER,
+       XSVNIC_CTRL_CONN_OK_COUNTER,
+       XSVNIC_CTRL_RDISC_COUNTER,
+       XSVNIC_CTRL_ERR_COUNTER,
+       XSVNIC_CTRL_RECV_ERR_COUNTER,
+       XSVNIC_DATA_CONN_OK_COUNTER,
+       XSVNIC_DATA_RDISC_COUNTER,
+       XSVNIC_DATA_ERR_COUNTER,
+       XSVNIC_SENT_OPER_UP_COUNTER,
+       XSVNIC_SENT_OPER_DOWN_COUNTER,
+       XSVNIC_SENT_OPER_STATE_FAILURE_COUNTER,
+       XSVNIC_SENT_OPER_STATE_SUCCESS_COUNTER,
+       XSVNIC_RX_DROP_STANDBY_COUNTER,
+       XSVNIC_TX_DROP_STANDBY_COUNTER,
+       XSVNIC_MAX_COUNTERS
+};
+
+struct ether_addr {
+       unsigned char addr[ETH_ALEN];
+};
+
+struct xsvnic_lro {
+       struct net_lro_mgr lro_mgr;
+       struct net_lro_desc lro_desc[XSVNIC_MAX_LRO_DESCRIPTORS];
+};
+
+struct xsvnic {
+       spinlock_t lock;
+       struct mutex mutex;
+       atomic_t ref_cnt;
+       struct completion done;
+       struct delayed_work sm_work;
+       unsigned long state;
+#define        XSVNIC_SYNC_DIRTY               1
+#define        XSVNIC_OS_ADMIN_UP              2
+#define        XSVNIC_CHASSIS_ADMIN_UP         3
+#define        XSVNIC_DELETING                 4
+#define        XSVNIC_SEND_ADMIN_STATE         5
+#define        XSVNIC_PORT_LINK_UP             6
+#define        XSVNIC_START_RX_SENT            7
+#define        XSVNIC_START_RESP_RCVD          8
+#define        XSVNIC_OPER_UP                  9
+#define        XSVNIC_STOP_RX_SENT             10
+#define        XSVNIC_XT_DOWN                  11
+#define        XSVNIC_XT_STATE_CHANGE          12
+#define        XSVNIC_SHUTDOWN                 13
+#define        XSVNIC_MCAST_LIST_SENT          14
+#define        XSVNIC_RING_SIZE_CHANGE         15
+#define        XSVNIC_RX_NOBUF                 16
+#define        XSVNIC_INTR_ENABLED             17
+#define        XSVNIC_TRIGGER_NAPI_SCHED       18
+#define        XSVNIC_IBLINK_DOWN              19
+#define        XSVNIC_MCAST_LIST_PENDING       20
+#define        XSVNIC_MCAST_LIST_TIMEOUT       21
+#define        XSVNIC_CHASSIS_ADMIN_SHADOW_UP  22
+#define        XSVNIC_OVER_QUOTA               23
+#define        XSVNIC_TSO_CHANGE               24
+#define        XSVNIC_RXBATCH_CHANGE           25
+#define        XSVNIC_STATE_STDBY              26
+       struct list_head xsvnic_list;
+       struct list_head vlan_list;
+       struct ether_addr *mc_addrs;
+       int mc_count;
+       struct net_device *netdev;
+       struct net_device_stats stats;
+       struct napi_struct napi;
+       u8 lro_mode;
+       struct xsvnic_lro lro;
+#define        XSVNIC_RECLAIM_COUNT    4
+       int reclaim_count;
+       u8 send_hbeat_flag;
+       int vlan_count;
+       xsmp_cookie_t xsmp_hndl;
+       u64 tca_guid;
+       u16 tca_lid;
+       struct xsvnic_conn ctrl_conn;
+       struct xsvnic_conn data_conn;
+       u32 counters[XSVNIC_MAX_COUNTERS];
+       u64 resource_id;
+       u32 bandwidth;
+       u32 mtu;
+       u64 mac;
+       char vnic_name[XSVNIC_MAX_NAME_SIZE];
+       u8 sl;
+       u16 mp_flag;
+       u8 mp_group[XSVNIC_MAX_NAME_SIZE];
+       u32 install_flag;
+       int port_speed;
+       struct xsmp_session_info xsmp_info;
+       struct xsvnic_iscsi_info iscsi_boot_info;
+       u8 ha_state;
+       int rx_ring_size;
+       int tx_ring_size;
+       int *budget;
+       unsigned long jiffies;
+       int sm_delay;
+       u8 iff_promisc;
+       u16 counters_cleared;
+       int page_order;
+       int is_tso;
+       int is_rxbatching;
+       int is_rxbat_operational;
+       struct vlan_group *vlgrp;
+       struct proc_dir_entry *vnic_dir;
+       int ix;
+};
+
+struct xsvnic_work {
+       struct work_struct work;
+       xsmp_cookie_t xsmp_hndl;
+       struct xsvnic *xsvnicp;
+       u8 *msg;
+       int len;
+       int status;
+};
+
+extern int xsvnic_debug;
+extern unsigned long xsvnic_wait_time;
+extern struct mutex xsvnic_mutex;
+extern struct list_head xsvnic_list;
+extern u32 xsvnic_counters[];
+extern int xsvnic_vlanaccel;
+
+extern void xsvnic_remove_procfs_root_entries(void);
+extern int xsvnic_create_procfs_root_entries(void);
+extern int xsvnic_add_proc_entry(struct xsvnic *vp);
+extern void xsvnic_remove_proc_entry(struct xsvnic *vp);
+extern int xsvnic_change_rxbatch(struct xsvnic *xsvnicp, int flag);
+
+extern int check_rxbatch_possible(struct xsvnic *xsvnicp, int flag);
+void xsvnic_count_segs(struct xsvnic *xsvnicp, char nr_segs, int pkt_len);
+int xsvnic_align_addr(char **start);
+void xsvnic_send_skb(struct xsvnic *xsvnicp, struct sk_buff *skb,
+                    int curr_pkt_len, char chksum_offload);
+
+#define MODULE_NAME "XSVNIC"
+
+enum {
+       DEBUG_DRV_INFO = 0x00000001,
+       DEBUG_DRV_FUNCTION = 0x00000002,
+       DEBUG_XSMP_INFO = 0x00000004,
+       DEBUG_XSMP_FUNCTION = 0x00000008,
+       DEBUG_IOCTRL_INFO = 0x00000010,
+       DEBUG_IOCTRL_FUNCTION = 0x00000020,
+       DEBUG_RXBAT_FUNCTION = 0x00000040,
+       DEBUG_DUMP_PKTS = 0x00000080,
+};
+
+static inline void dumppkt(unsigned char *pkt, unsigned short len, char *name)
+{
+       int i = 0;
+       unsigned char *p = (unsigned char *)pkt;
+       char line[64] = { 0 };
+       char *cp = line;
+       char filter[] = "0123456789abcdef";
+       int printed_line = 0;
+
+       pr_info("%s DumpPacket of %d\n", name, len);
+
+       for (i = 0; i < (len - 1); i++) {
+               if ((i != 0) && (i % 8 == 0)) {
+                       pr_info("%s\n", line);
+                       memset(line, 0, sizeof(line));
+                       cp = line;
+                       printed_line = 1;
+               } else {
+                       printed_line = 0;
+               }
+
+               if (*p > 0x0f)
+                       *cp++ = filter[*p >> 4];
+               else
+                       *cp++ = filter[0];
+
+               *cp++ = filter[*p++ & 0xf];
+               *cp++ = ':';
+               if (((len - i) == 1) && !printed_line) {
+                       pr_info("%s\n", line);
+                       memset(line, 0, sizeof(line));
+                       cp = line;
+               }
+       }
+       *--cp = 0;
+}
+
+#define PRINT(level, x, fmt, arg...)                           \
+       printk(level "%s: " fmt, MODULE_NAME, ##arg)
+
+#define PRINT_CONDITIONAL(level, x, condition, fmt, arg...)    \
+       do {                                                    \
+               if (condition)                                  \
+                       printk(level "%s: %s: "fmt,             \
+                               MODULE_NAME, x, ##arg);         \
+       } while (0)
+
+#define DRV_PRINT(fmt, arg...)                                 \
+       PRINT(KERN_INFO, "DRV", fmt, ##arg)
+#define DRV_ERROR(fmt, arg...)                                 \
+       PRINT(KERN_ERR, "DRV", fmt, ##arg)
+
+#define DRV_FUNCTION(fmt, arg...)                              \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "DRV",                                  \
+                       (xsvnic_debug & DEBUG_DRV_FUNCTION),    \
+                       fmt, ##arg)
+
+#define DRV_INFO(fmt, arg...)                                  \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "DRV",                                  \
+                       (xsvnic_debug & DEBUG_DRV_INFO),        \
+                       fmt, ##arg)
+
+#define XSMP_PRINT(fmt, arg...)                                        \
+       PRINT(KERN_INFO, "XSMP", fmt, ##arg)
+#define XSMP_ERROR(fmt, arg...)                                        \
+       PRINT(KERN_ERR, "XSMP", fmt, ##arg)
+
+#define XSMP_FUNCTION(fmt, arg...)                             \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "XSMP",                                 \
+                       (xsvnic_debug & DEBUG_XSMP_FUNCTION),   \
+                       fmt, ##arg)
+
+#define XSMP_INFO(fmt, arg...)                                 \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "XSMP",                                 \
+                       (xsvnic_debug & DEBUG_XSMP_INFO),       \
+                       fmt, ##arg)
+#define IOCTRL_PRINT(fmt, arg...)                              \
+       PRINT(KERN_INFO, "IOCTRL", fmt, ##arg)
+#define IOCTRL_ERROR(fmt, arg...)                              \
+       PRINT(KERN_ERR, "IOCTRL", fmt, ##arg)
+
+#define IOCTRL_FUNCTION(fmt, arg...)                           \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "IOCTRL",                               \
+                       (xsvnic_debug & DEBUG_IOCTRL_FUNCTION), \
+                       fmt, ##arg)
+
+#define IOCTRL_INFO(fmt, arg...)                               \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "IOCTRL",                               \
+                       (xsvnic_debug & DEBUG_IOCTRL_INFO),     \
+                       fmt, ##arg)
+#define IORXBAT_FUNC(fmt, arg...)                              \
+       PRINT_CONDITIONAL(KERN_INFO,                            \
+                       "RXBAT",                                \
+                       (xsvnic_debug & DEBUG_RXBAT_FUNCTION),  \
+                       fmt, ##arg)
+
+#if !defined(NETDEV_HW_ADDR_T_MULTICAST)
+
+static inline void netdev_mc_list_copy(struct xsvnic *xsvnicp)
+{
+       struct dev_mc_list *ha;
+       struct net_device *netdev = xsvnicp->netdev;
+       struct ether_addr *eaddr = xsvnicp->mc_addrs;
+
+       netdev_for_each_mc_addr(ha, netdev) {
+               ether_addr_copy(eaddr->addr, ha->dmi_addr);
+               eaddr++;
+       }
+}
+
+#else
+
+static inline void netdev_mc_list_copy(struct xsvnic *xsvnicp)
+{
+       struct netdev_hw_addr *ha;
+       struct net_device *netdev = xsvnicp->netdev;
+       struct ether_addr *eaddr = xsvnicp->mc_addrs;
+
+       netdev_for_each_mc_addr(ha, netdev) {
+               ether_addr_copy(eaddr->addr, ha->addr);
+               eaddr++;
+       }
+}
+
+#endif
+
+struct xs_vlan_header {
+       u32 tso_info;
+} __packed;
+
+struct xs_tso_header {
+       u32 tso_info;
+} __packed;
+
+struct xs_tsovlan_header {
+       u32 tso_info;
+       u32 vlan_info;
+} __packed;
+
+#endif /* __XSVNIC_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xsvnic/xsvnic_main.c b/drivers/infiniband/ulp/xsigo/xsvnic/xsvnic_main.c
new file mode 100644 (file)
index 0000000..c53366c
--- /dev/null
@@ -0,0 +1,3336 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <asm/byteorder.h>
+#include <linux/mii.h>
+#include <linux/tcp.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/netdevice.h>
+#include "xsvnic.h"
+#include "xscore.h"
+#include <xs_compat.h>
+
+MODULE_AUTHOR("Oracle corp (OVN-linux-drivers@oracle.com)");
+MODULE_DESCRIPTION("OVN XSVNIC network driver");
+MODULE_VERSION(XSVNIC_DRIVER_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+
+#ifndef NETIF_F_LRO
+#define NETIF_F_LRO        NETIF_F_SW_LRO
+#endif
+static int napi_weight = 64;
+module_param(napi_weight, int, 0644);
+
+static int xsigo_session_service_id = -1;
+static int xsvnic_havnic = 1;
+module_param(xsvnic_havnic, int, 0644);
+
+int xsvnic_debug = 0x0;
+module_param(xsvnic_debug, int, 0644);
+
+static int xsvnic_force_csum_offload = 0x0;
+module_param(xsvnic_force_csum_offload, int, 0644);
+
+/*lro specifics*/
+int lro;
+static int lro_max_aggr = XSVNIC_LRO_MAX_AGGR;
+module_param(lro, int, 0444);
+module_param(lro_max_aggr, int, 0644);
+MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)");
+MODULE_PARM_DESC(lro_max_aggr,
+                "LRO: Max packets to be aggregated (default = 64)");
+
+static int multicast_list_disable;
+module_param(multicast_list_disable, int, 0644);
+
+static int xsvnic_hbeat_enable = 2;
+module_param(xsvnic_hbeat_enable, int, 0644);
+
+int xsvnic_rxring_size = 256;
+module_param(xsvnic_rxring_size, int, 0444);
+
+int xsvnic_txring_size = 256;
+module_param(xsvnic_txring_size, int, 0444);
+
+int xsvnic_highdma;
+module_param(xsvnic_highdma, int, 0644);
+
+int xsvnic_vlanaccel;
+module_param(xsvnic_vlanaccel, int, 0644);
+
+int xsvnic_rxbatching = 1;
+module_param(xsvnic_rxbatching, int, 0644);
+
+int xsvnic_report_10gbps;
+module_param(xsvnic_report_10gbps, int, 0644);
+
+int xsvnic_reclaim_count = XSVNIC_RECLAIM_COUNT;
+module_param(xsvnic_reclaim_count, int, 0644);
+
+int xsvnic_tx_queue_len = 1000;
+module_param(xsvnic_tx_queue_len, int, 0644);
+
+int xsvnic_tx_intr_mode;
+module_param(xsvnic_tx_intr_mode, int, 0644);
+
+int xsvnic_max_coal_frames;
+module_param(xsvnic_max_coal_frames, int, 0644);
+
+int xsvnic_coal_usecs = 100;
+module_param(xsvnic_coal_usecs, int, 0644);
+
+int xsvnic_rx_intr_mode;
+module_param(xsvnic_rx_intr_mode, int, 0644);
+
+int xsvnic_wait_in_boot = 1;
+module_param(xsvnic_wait_in_boot, int, 0644);
+
+int xsvnic_wait_per_vnic = 30;
+module_param(xsvnic_wait_per_vnic, int, 0644);
+
+unsigned long xsvnic_wait_time;
+static int xsvnic_xsmp_service_id = -1;
+struct list_head xsvnic_list;
+static spinlock_t xsvnic_lock;
+struct mutex xsvnic_mutex;
+static struct workqueue_struct *xsvnic_wq;
+static struct workqueue_struct *xsvnic_io_wq;
+u32 xsvnic_counters[XSVNIC_MAX_GLOB_COUNTERS];
+
+static void queue_sm_work(struct xsvnic *xsvnicp, int msecs);
+static void _xsvnic_set_multicast(struct xsvnic *xsvnicp);
+static void xsvnic_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data,
+                                     int len);
+static int xsvnic_remove_vnic(struct xsvnic *xsvnicp);
+static void xsvnic_send_cmd_to_xsigod(struct xsvnic *xsvnicp, int cmd);
+static void xsvnic_reclaim_tx_buffers(struct xsvnic *xsvnicp);
+static void handle_ring_size_change(struct xsvnic *xsvnicp);
+static void handle_rxbatch_change(struct xsvnic *xsvnicp);
+static int xsvnic_start_xmit(struct sk_buff *skb, struct net_device *netdev);
+static void xsvnic_update_oper_state(struct xsvnic *xsvnicp);
+static void xsvnic_update_tca_info(struct xsvnic *xsvnicp,
+                                  struct xsvnic_xsmp_msg *xmsgp,
+                                  int set_oper_down);
+char *xsvnic_get_rxbat_pkts(struct xsvnic *xsvnicp, int *curr_seg_len,
+                           char *start, char *is_last_pkt, int total_pkt_len);
+
+static inline int xsvnic_esx_preregister_setup(struct net_device *netdev)
+{
+       return 0;
+}
+
+static inline int xsvnic_esx_postregister_setup(struct net_device *netdev)
+{
+       return 0;
+}
+
+static inline void vmk_notify_uplink(struct net_device *netdev)
+{
+}
+
+static inline void xsvnic_process_pages(struct xsvnic *xsvnicp,
+                                       struct xscore_buf_info *binfo)
+{
+       struct page *page;
+       struct sk_buff *skb;
+       int tot_pkt_len, hdr_len, curr_pkt_len, page_offset = 0;
+       char *start, *copy_start;
+       char nr_segs = 0, is_last_seg = 1;
+
+       tot_pkt_len = binfo->sz;
+       page = binfo->cookie;
+       start = page_address(page) + page_offset;
+
+       do {
+               curr_pkt_len = 0;
+               copy_start = xsvnic_get_rxbat_pkts(xsvnicp, &curr_pkt_len,
+                                                  start, &is_last_seg,
+                                                  tot_pkt_len);
+
+               hdr_len = min((int)(XSVNIC_MIN_PACKET_LEN), curr_pkt_len);
+               skb = dev_alloc_skb(hdr_len + NET_IP_ALIGN);
+               if (!skb) {
+                       pr_err("XSVNIC: %s unable to allocate skb\n", __func__);
+                       put_page(page);
+                       break;
+               }
+               skb_reserve(skb, NET_IP_ALIGN);
+               memcpy(skb->data, copy_start, hdr_len);
+
+               skb_fill_page_desc(skb, 0, page,
+                                  page_offset + hdr_len + XS_RXBAT_HDRLEN,
+                                  curr_pkt_len - hdr_len);
+
+               skb->data_len = curr_pkt_len - hdr_len;
+               skb->len += curr_pkt_len;
+               skb->tail += hdr_len;
+
+               if (!is_last_seg) {
+                       start = copy_start + curr_pkt_len;
+                       page_offset += XS_RXBAT_HDRLEN + curr_pkt_len +
+                           xsvnic_align_addr(&start);
+                       get_page(page);
+               }
+
+               xsvnic_send_skb(xsvnicp, skb, curr_pkt_len, 0);
+               nr_segs++;
+       } while (!is_last_seg);
+
+       xsvnic_count_segs(xsvnicp, nr_segs, tot_pkt_len);
+}
+
+static inline void xsvnic_dev_kfree_skb_any(struct sk_buff *skb)
+{
+       if (skb != NULL)
+               dev_kfree_skb_any(skb);
+       else
+               pr_err("%s Error skb is null\n", __func__);
+}
+
+/*
+ * All XSMP related protocol messages
+ */
+
+static void xsvnic_put_ctx(struct xsvnic *xsvnicp)
+{
+       atomic_dec(&xsvnicp->ref_cnt);
+}
+
+static int xsvnic_xsmp_send_msg(xsmp_cookie_t xsmp_hndl, void *data, int length)
+{
+       struct xsmp_message_header *m_header = data;
+       int ret;
+
+       m_header->length = cpu_to_be16(m_header->length);
+       ret = xcpm_send_message(xsmp_hndl, xsvnic_xsmp_service_id, data,
+                               length);
+       if (ret)
+               xcpm_free_msg(data);
+       return ret;
+}
+
+static int xsvnic_xsmp_send_ack(xsmp_cookie_t xsmp_hndl,
+                               struct xsvnic_xsmp_msg *xmsgp)
+{
+       void *msg;
+       struct xsmp_message_header *m_header;
+       int total_len = sizeof(*xmsgp) + sizeof(*m_header);
+
+       msg = xcpm_alloc_msg(total_len);
+       if (!msg)
+               return -ENOMEM;
+       m_header = (struct xsmp_message_header *)msg;
+       m_header->type = XSMP_MESSAGE_TYPE_VNIC;
+       m_header->length = total_len;
+
+       xmsgp->code = 0;
+
+       memcpy(msg + sizeof(*m_header), xmsgp, sizeof(*xmsgp));
+
+       return xsvnic_xsmp_send_msg(xsmp_hndl, msg, total_len);
+}
+
+static int xsvnic_xsmp_send_nack(xsmp_cookie_t xsmp_hndl, void *data,
+                                int length, u8 code)
+{
+       void *msg;
+       struct xsmp_message_header *m_header;
+       int total_len = length + sizeof(struct xsmp_message_header);
+       struct xsvnic_xsmp_msg *xsmsgp = (struct xsvnic_xsmp_msg *)data;
+
+       msg = xcpm_alloc_msg(total_len);
+       if (!msg)
+               return -ENOMEM;
+       m_header = (struct xsmp_message_header *)msg;
+       m_header->type = XSMP_MESSAGE_TYPE_VNIC;
+       m_header->length = total_len;
+
+       xsmsgp->code = XSMP_XSVNIC_NACK | code;
+       memcpy(msg + sizeof(*m_header), data, length);
+       return xsvnic_xsmp_send_msg(xsmp_hndl, msg, total_len);
+}
+
+static int xsvnic_xsmp_send_notification(xsmp_cookie_t xsmp_hndl, u64 vid,
+                                        int notifycmd)
+{
+       int length = sizeof(struct xsmp_message_header) +
+           sizeof(struct xsvnic_xsmp_msg);
+       void *msg;
+       struct xsmp_message_header *header;
+       struct xsvnic_xsmp_msg *xsmp_msg;
+
+       msg = xcpm_alloc_msg(length);
+       if (!msg)
+               return -ENOMEM;
+
+       memset(msg, 0, length);
+
+       header = (struct xsmp_message_header *)msg;
+       xsmp_msg = (struct xsvnic_xsmp_msg *)(msg + sizeof(*header));
+
+       header->type = XSMP_MESSAGE_TYPE_VNIC;
+       header->length = length;
+
+       xsmp_msg->type = notifycmd;
+       xsmp_msg->length = cpu_to_be16(sizeof(*xsmp_msg));
+       xsmp_msg->resource_id = cpu_to_be64(vid);
+
+       return xsvnic_xsmp_send_msg(xsmp_hndl, msg, length);
+}
+
+static int xsvnic_xsmp_send_ha_state(struct xsvnic *xsvnicp, int ha_state)
+{
+       struct xsmp_message_header *header;
+       void *msg;
+       struct xsvnic_ha_info_msg *ha_info_msgp;
+       int length = sizeof(struct xsmp_message_header) +
+           sizeof(struct xsvnic_ha_info_msg);
+
+       msg = xcpm_alloc_msg(length);
+       if (!msg)
+               return -ENOMEM;
+
+       memset(msg, 0, length);
+       header = (struct xsmp_message_header *)msg;
+       header->type = XSMP_MESSAGE_TYPE_VNIC;
+       header->length = length;
+       ha_info_msgp = msg + sizeof(struct xsmp_message_header);
+       ha_info_msgp->type = XSMP_XSVNIC_HA_INFO;
+       ha_info_msgp->length = cpu_to_be16(sizeof(*ha_info_msgp));
+       ha_info_msgp->resource_id = cpu_to_be64(xsvnicp->resource_id);
+       ha_info_msgp->ha_state = ha_state;
+       return xsvnic_xsmp_send_msg(xsvnicp->xsmp_hndl, msg, length);
+}
+
+static int xsvnic_xsmp_send_oper_state(struct xsvnic *xsvnicp,
+                                      u64 vid, int state)
+{
+       int ret;
+       xsmp_cookie_t xsmp_hndl = xsvnicp->xsmp_hndl;
+       char *str = state == XSMP_XSVNIC_OPER_UP ? "UP" : "DOWN";
+
+       ret = xsvnic_xsmp_send_notification(xsmp_hndl, vid, state);
+       switch (state) {
+       case XSMP_XSVNIC_OPER_UP:
+               xsvnicp->counters[XSVNIC_SENT_OPER_UP_COUNTER]++;
+               break;
+       case XSMP_XSVNIC_OPER_DOWN:
+               xsvnicp->counters[XSVNIC_SENT_OPER_DOWN_COUNTER]++;
+               break;
+       }
+       if (ret) {
+               xsvnicp->counters[XSVNIC_SENT_OPER_STATE_FAILURE_COUNTER]++;
+               XSMP_INFO("%s:Oper %s notification failed for", __func__, str);
+               XSMP_INFO("resource_id: 0x%Lx\n", vid);
+       } else {
+               xsvnicp->counters[XSVNIC_SENT_OPER_STATE_SUCCESS_COUNTER]++;
+               XSMP_INFO("%s:Oper %s notification succeeded ", __func__, str);
+               XSMP_INFO("for resource_id: 0x%Lx\n", vid);
+       }
+
+       return ret;
+}
+
+/*
+ * Handle all IO path messaging here
+ * Called with mutex held
+ */
+static int xsvnic_send_start_stop(struct xsvnic *xsvnicp, int opcode)
+{
+       struct xsvnic_control_msg *header;
+       int len = sizeof(*header);
+       int ret;
+
+       if (xsvnicp->ctrl_conn.state != XSVNIC_CONN_CONNECTED)
+               return -ENOTCONN;
+       header = kmalloc(len, GFP_ATOMIC);
+       if (!header)
+               return -ENOMEM;
+
+       header->type = opcode;
+       /*
+        * Bug here where it needs to be swapped
+        */
+       header->length = sizeof(*header);
+       /*
+        * This is called with interrupts not disabled
+        */
+       ret = xscore_post_send(&xsvnicp->ctrl_conn.ctx, header, len, 0);
+       if (ret)
+               kfree(header);
+       if (opcode == XSVNIC_START_RX)
+               xsvnicp->counters[XSVNIC_START_RX_COUNTER]++;
+       else
+               xsvnicp->counters[XSVNIC_STOP_RX_COUNTER]++;
+       return ret;
+}
+
+static int xsvnic_send_vlan_list(struct xsvnic *xsvnicp, u16 *vlanp, int count,
+                                int opcode)
+{
+       u8 *msg;
+       struct xsvnic_control_msg *header;
+       u16 *vp;
+       int len, i;
+       int ret;
+
+       if (xsvnicp->ctrl_conn.state != XSVNIC_CONN_CONNECTED)
+               return -ENOTCONN;
+       len = sizeof(*header) + (count * sizeof(u16));
+       msg = kmalloc(len, GFP_ATOMIC);
+       if (!msg)
+               return -ENOMEM;
+       vp = (u16 *) (msg + sizeof(*header));
+       for (i = 0; i < count; i++)
+               *vp++ = cpu_to_be16(*vlanp++);
+       header = (struct xsvnic_control_msg *)msg;
+       header->type = opcode;
+       header->length = cpu_to_be16(len);
+       ret = xscore_post_send(&xsvnicp->ctrl_conn.ctx, msg, len,
+                              XSCORE_DEFER_PROCESS);
+       if (ret)
+               kfree(msg);
+       return ret;
+}
+
+static int xsvnic_send_allvlan_list(struct xsvnic *xsvnicp)
+{
+       int count = xsvnicp->vlan_count;
+       u16 *vlan_listp, *vp;
+       struct vlan_entry *vlan;
+       int ret;
+
+       if (count == 0)
+               return 0;
+
+       vlan_listp = kmalloc_array(count, sizeof(u16), GFP_ATOMIC);
+       if (!vlan_listp)
+               return -ENOMEM;
+       vp = vlan_listp;
+       list_for_each_entry(vlan, &xsvnicp->vlan_list, vlan_list)
+               * vp++ = vlan->vlan_id;
+       ret = xsvnic_send_vlan_list(xsvnicp, vlan_listp, count,
+                                   XSVNIC_ASSIGN_VLAN);
+       kfree(vlan_listp);
+       return ret;
+}
+
+/*
+ * Called with spin lock held
+ */
+
+static int xsvnic_send_multicast_list(struct xsvnic *xsvnicp, u8 *msg, int len,
+                                     int promisc)
+{
+       int ret;
+       struct xsvnic_control_msg *header;
+
+       header = (struct xsvnic_control_msg *)msg;
+       header->type = XSVNIC_MULTICAST_LIST_SEND;
+       /*
+        * This is a bug, needs swapping unfortunately the bug is in
+        * xvnd code and we need to carry the bug forward for backward
+        * compatibility
+        */
+       header->length = len;
+       header->data = promisc;
+       clear_bit(XSVNIC_MCAST_LIST_TIMEOUT, &xsvnicp->state);
+       ret = xscore_post_send(&xsvnicp->ctrl_conn.ctx, msg, len,
+                              XSCORE_DEFER_PROCESS);
+       if (ret) {
+               kfree(msg);
+               return ret;
+       } else
+               set_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state);
+       return 0;
+}
+
+static void handle_port_link_change(struct xsvnic *xsvnicp, int linkup)
+{
+       if (linkup) {
+               set_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state);
+               netif_carrier_on(xsvnicp->netdev);
+               netif_wake_queue(xsvnicp->netdev);
+       } else {
+               clear_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state);
+               netif_carrier_off(xsvnicp->netdev);
+               netif_stop_queue(xsvnicp->netdev);
+       }
+}
+
+static int speed_arr[] = { 0, 100, 10, 20, 500, 800, 1000, 2000, 3000, 4000,
+       5000, 6000, 7000, 8000, 9000, 10000
+};
+
+static int xsvnic_convert_speed(int sp)
+{
+       if (sp < 0 || sp >= (sizeof(speed_arr) / sizeof(int)))
+               return 1000;
+       return speed_arr[sp];
+}
+
+static void handle_vnic_control_msgs(struct work_struct *work)
+{
+       struct xsvnic_work *xwork = container_of(work, struct xsvnic_work,
+                                                work);
+       struct xsvnic *xsvnicp = xwork->xsvnicp;
+       struct xsvnic_control_msg *header =
+           (struct xsvnic_control_msg *)xwork->msg;
+       struct xsvnic_start_rx_resp_msg *resp;
+       struct xsvnic_link_up_msg *linkp;
+       unsigned long flags;
+
+       switch (header->type) {
+       case XSVNIC_START_RX_RESPONSE:
+               IOCTRL_INFO("VNIC: %s Start Rx Response\n", xsvnicp->vnic_name);
+               resp = (struct xsvnic_start_rx_resp_msg *)&header->data;
+               if (test_bit(XSVNIC_START_RX_SENT, &xsvnicp->state) &&
+                   !test_bit(XSVNIC_START_RESP_RCVD, &xsvnicp->state)) {
+                       xsvnicp->counters[XSVNIC_START_RX_RESP_COUNTER]++;
+                       set_bit(XSVNIC_START_RESP_RCVD, &xsvnicp->state);
+                       xsvnicp->port_speed =
+                           xsvnic_convert_speed(resp->port_speed);
+                       xsvnicp->jiffies = jiffies;
+                       pr_info("XSVNIC: %s Port Speed %d Mbps\n",
+                               xsvnicp->vnic_name, xsvnicp->port_speed);
+                       /*
+                        * Alright port is UP now enable carrier state
+                        */
+                       if (test_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state))
+                               handle_port_link_change(xsvnicp, 1);
+                       complete(&xsvnicp->done);
+               } else
+                       xsvnicp->counters[XSVNIC_BAD_RX_RESP_COUNTER]++;
+               break;
+       case XSVNIC_LINK_UP:
+               if (!test_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state)) {
+                       linkp = (struct xsvnic_link_up_msg *)&header->data;
+                       xsvnicp->port_speed =
+                           xsvnic_convert_speed(linkp->port_speed);
+                       handle_port_link_change(xsvnicp, 1);
+                       xsvnicp->counters[XSVNIC_PORT_LINK_UP_COUNTER]++;
+                       pr_info("XSVNIC: %s Link Up, speed: %d Mbps\n",
+                               xsvnicp->vnic_name, xsvnicp->port_speed);
+               } else {
+                       xsvnicp->counters[XSVNIC_DUP_PORT_LINK_UP_COUNTER]++;
+                       IOCTRL_INFO("VNIC: %s Duplicate Link Up message\n",
+                                   xsvnicp->vnic_name);
+               }
+               break;
+       case XSVNIC_LINK_DOWN:
+               if (test_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state)) {
+                       handle_port_link_change(xsvnicp, 0);
+                       xsvnicp->counters[XSVNIC_PORT_LINK_DOWN_COUNTER]++;
+                       pr_info("XSVNIC: %s Link Down (Eth)\n",
+                               xsvnicp->vnic_name);
+               } else {
+                       xsvnicp->counters[XSVNIC_DUP_PORT_LINK_DOWN_COUNTER]++;
+                       IOCTRL_INFO("VNIC: %s Duplicate Link Down message\n",
+                                   xsvnicp->vnic_name);
+               }
+               break;
+       case XSVNIC_MULTICAST_LIST_RESPONSE:
+               spin_lock_irqsave(&xsvnicp->lock, flags);
+               clear_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state);
+               clear_bit(XSVNIC_MCAST_LIST_TIMEOUT, &xsvnicp->state);
+               xsvnicp->counters[XSVNIC_MCAST_LIST_RESP_COUNTER]++;
+               if (test_and_clear_bit(XSVNIC_MCAST_LIST_PENDING,
+                                      &xsvnicp->state))
+                       _xsvnic_set_multicast(xsvnicp);
+               spin_unlock_irqrestore(&xsvnicp->lock, flags);
+               break;
+       default:
+               IOCTRL_ERROR("VNIC: %s Unknown message type %d\n",
+                            xsvnicp->vnic_name, header->type);
+               break;
+       }
+       kfree(xwork->msg);
+       kfree(xwork);
+       xsvnic_put_ctx(xsvnicp);
+}
+
+static void xsvnic_set_oper_down(struct xsvnic *xsvnicp, int lock)
+{
+       unsigned long flags = 0;
+
+       if (lock)
+               spin_lock_irqsave(&xsvnicp->lock, flags);
+       if (test_and_clear_bit(XSVNIC_OPER_UP, &xsvnicp->state)) {
+               netif_carrier_off(xsvnicp->netdev);
+               netif_stop_queue(xsvnicp->netdev);
+               clear_bit(XSVNIC_START_RX_SENT, &xsvnicp->state);
+               clear_bit(XSVNIC_START_RESP_RCVD, &xsvnicp->state);
+               clear_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state);
+               clear_bit(XSVNIC_OPER_UP, &xsvnicp->state);
+               clear_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state);
+               clear_bit(XSVNIC_MCAST_LIST_PENDING, &xsvnicp->state);
+               clear_bit(XSVNIC_OVER_QUOTA, &xsvnicp->state);
+               xsvnicp->ctrl_conn.state = XSVNIC_CONN_ERROR;
+               xsvnicp->data_conn.state = XSVNIC_CONN_ERROR;
+               xsvnic_xsmp_send_oper_state(xsvnicp, xsvnicp->resource_id,
+                                           XSMP_XSVNIC_OPER_DOWN);
+               xsvnicp->ha_state = XSVNIC_HA_STATE_UNKNOWN;
+       }
+       if (lock)
+               spin_unlock_irqrestore(&xsvnicp->lock, flags);
+}
+
+static void xsvnic_ctrl_send_handler(void *client_arg, void *msg, int status,
+                                    int n)
+{
+       struct xsvnic *xsvnicp = client_arg;
+
+       IOCTRL_INFO("%s:Send Status %d, vnic: %s\n", __func__, status,
+                   xsvnicp->vnic_name);
+       if (status) {
+               IOCTRL_ERROR("VNIC: %s Ctrl Send Completion error: %d\n",
+                            xsvnicp->vnic_name, status);
+               xsvnicp->counters[XSVNIC_QP_ERROR_COUNTER]++;
+               xsvnic_set_oper_down(xsvnicp, 1);
+       }
+       kfree(msg);
+}
+
+/*
+ * Called from interrupt context
+ */
+static void xsvnic_ctrl_recv_handler(void *client_arg, void *msg, int sz,
+                                    int status, int n)
+{
+       struct xsvnic *xsvnicp = client_arg;
+       struct xsvnic_work *work;
+       unsigned long flags;
+
+       if (status) {
+               IOCTRL_ERROR("%s: Recv Completion error: status %d\n",
+                            xsvnicp->vnic_name, status);
+               xsvnicp->counters[XSVNIC_CTRL_RECV_ERR_COUNTER]++;
+               xsvnic_set_oper_down(xsvnicp, 1);
+               kfree(msg);
+               return;
+       }
+       work = kmalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work) {
+               kfree(msg);
+               return;
+       }
+       INIT_WORK(&work->work, handle_vnic_control_msgs);
+       work->xsvnicp = xsvnicp;
+       work->msg = msg;
+       work->len = sz;
+       work->status = status;
+
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       if (!test_bit(XSVNIC_DELETING, &xsvnicp->state)) {
+               atomic_inc(&xsvnicp->ref_cnt);
+               queue_work(xsvnic_io_wq, &work->work);
+       } else {
+               kfree(msg);
+               kfree(work);
+       }
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+}
+
+/*
+ * Data is pending, in interrupt context
+ */
+static void xsvnic_data_recv_handler(void *client_arg)
+{
+       struct xsvnic *xsvnicp = client_arg;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       if (test_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state) &&
+           test_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state) &&
+           test_bit(XSVNIC_OPER_UP, &xsvnicp->state) &&
+           !test_bit(XSVNIC_DELETING, &xsvnicp->state)) {
+               xsvnicp->counters[XSVNIC_NAPI_SCHED_COUNTER]++;
+               clear_bit(XSVNIC_INTR_ENABLED, &xsvnicp->state);
+               napi_schedule(&xsvnicp->napi);
+       } else
+               xsvnicp->counters[XSVNIC_NAPI_NOTSCHED_COUNTER]++;
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+}
+
+static inline void xsvnic_conn_disconnect(struct xsvnic *xsvnicp,
+                                         struct xsvnic_conn *conn)
+{
+       conn->state = XSVNIC_CONN_DISCONNECTED;
+       /*
+        * Whenever we call xscore_conn_disconnect,
+        * make sure there are no mutexes held
+        */
+       mutex_unlock(&xsvnicp->mutex);
+       xscore_conn_disconnect(&conn->ctx, 0);
+       mutex_lock(&xsvnicp->mutex);
+}
+
+static void xsvnic_io_disconnect(struct xsvnic *xsvnicp)
+{
+       xsvnic_set_oper_down(xsvnicp, 1);
+       if (test_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state))
+               napi_synchronize(&xsvnicp->napi);
+       xsvnic_conn_disconnect(xsvnicp, &xsvnicp->ctrl_conn);
+       xsvnic_conn_disconnect(xsvnicp, &xsvnicp->data_conn);
+       if (test_bit(XSVNIC_RING_SIZE_CHANGE, &xsvnicp->state))
+               handle_ring_size_change(xsvnicp);
+       if (test_bit(XSVNIC_RXBATCH_CHANGE, &xsvnicp->state))
+               handle_rxbatch_change(xsvnicp);
+}
+
+static int xsvnic_send_data_hbeat(struct xsvnic *xsvnicp)
+{
+       struct sk_buff *skb;
+       struct arphdr *arp;
+       unsigned char *arp_ptr, *eth_ptr;
+       int ret;
+
+       skb = alloc_skb(XSVNIC_MIN_PACKET_LEN, GFP_ATOMIC);
+       if (skb == NULL)
+               return -ENOMEM;
+
+       eth_ptr = (unsigned char *)skb_put(skb, XSVNIC_MIN_PACKET_LEN);
+       ether_addr_copy(eth_ptr, xsvnicp->netdev->dev_addr);
+       eth_ptr += ETH_ALEN;
+       ether_addr_copy(eth_ptr, xsvnicp->netdev->dev_addr);
+       eth_ptr += ETH_ALEN;
+       *eth_ptr++ = (ETH_P_RARP >> 8) & 0xff;
+       *eth_ptr++ = ETH_P_RARP & 0xff;
+
+       arp = (struct arphdr *)eth_ptr;
+       arp->ar_hrd = htons(xsvnicp->netdev->type);
+       arp->ar_hln = xsvnicp->netdev->addr_len;
+       arp->ar_pln = 4;
+       arp->ar_op = htons(ARPOP_RREPLY);
+
+       arp_ptr = (unsigned char *)(arp + 1);
+
+       ether_addr_copy(arp_ptr, xsvnicp->netdev->dev_addr);
+       arp_ptr += xsvnicp->netdev->addr_len;
+       arp_ptr += 4;
+       ether_addr_copy(arp_ptr, xsvnicp->netdev->dev_addr);
+
+       skb_reset_network_header(skb);
+       skb->dev = xsvnicp->netdev;
+       skb->protocol = htons(ETH_P_RARP);
+
+       ret = xsvnic_start_xmit(skb, xsvnicp->netdev);
+       if (ret)
+               dev_kfree_skb_any(skb);
+
+       return 0;
+}
+
+static int xsvnic_send_ctrl_hbeat(struct xsvnic *xsvnicp)
+{
+       struct xsmp_message_header *header;
+       int ret;
+
+       header = kmalloc(sizeof(*header), GFP_ATOMIC);
+       if (!header)
+               return -ENOMEM;
+       header->type = XSVNIC_HEART_BEAT;
+       header->length = sizeof(*header);
+       ret = xscore_post_send(&xsvnicp->ctrl_conn.ctx, header,
+                              sizeof(*header), 0);
+       if (ret)
+               kfree(header);
+       return ret;
+}
+
+/*
+ * Send heartbeat over control channel or data channel
+ */
+static int xsvnic_send_hbeat(struct xsvnic *xsvnicp)
+{
+       int ret = 0;
+
+       if (!xsvnic_hbeat_enable)
+               return 0;
+       if (xsvnic_hbeat_enable == 1) {
+               ret = xsvnic_send_ctrl_hbeat(xsvnicp);
+               xsvnicp->counters[XSVNIC_CTRL_HBEAT_COUNTER]++;
+       } else {
+               xsvnic_send_data_hbeat(xsvnicp);
+               xsvnicp->counters[XSVNIC_DATA_HBEAT_COUNTER]++;
+       }
+       return ret;
+}
+
+static void handle_ha_sm(struct xsvnic *xsvnicp)
+{
+       if ((xsvnicp->mp_flag & (MP_XSVNIC_PRIMARY |
+               MP_XSVNIC_SECONDARY)) == 0) {
+               xsvnicp->ha_state = XSVNIC_HA_STATE_ACTIVE;
+               return;
+       }
+       /*
+        * Check HA state and send update if things have changed
+        */
+       if (xsvnicp->ha_state == XSVNIC_HA_STATE_UNKNOWN) {
+               xsvnicp->ha_state = test_bit(XSVNIC_STATE_STDBY,
+                                            &xsvnicp->state)
+                   ? XSVNIC_HA_STATE_STANDBY : XSVNIC_HA_STATE_ACTIVE;
+               xsvnic_xsmp_send_ha_state(xsvnicp, xsvnicp->ha_state);
+       } else if (xsvnicp->ha_state == XSVNIC_HA_STATE_ACTIVE &&
+                  (test_bit(XSVNIC_STATE_STDBY, &xsvnicp->state))) {
+               xsvnicp->ha_state = XSVNIC_HA_STATE_STANDBY;
+               xsvnic_xsmp_send_ha_state(xsvnicp, xsvnicp->ha_state);
+       } else if (xsvnicp->ha_state == XSVNIC_HA_STATE_STANDBY &&
+                  (!test_bit(XSVNIC_STATE_STDBY, &xsvnicp->state))) {
+               xsvnicp->ha_state = XSVNIC_HA_STATE_ACTIVE;
+               xsvnic_xsmp_send_ha_state(xsvnicp, xsvnicp->ha_state);
+       }
+}
+
+static void handle_hbeat_sm(struct xsvnic *xsvnicp)
+{
+       unsigned long flags;
+       /*
+        * Send heartbeat if send_hbeat_flag is set
+        */
+       if (xsvnicp->send_hbeat_flag) {
+               spin_lock_irqsave(&xsvnicp->lock, flags);
+               xsvnic_reclaim_tx_buffers(xsvnicp);
+               spin_unlock_irqrestore(&xsvnicp->lock, flags);
+               if (xsvnicp->ha_state == XSVNIC_HA_STATE_ACTIVE
+                   && xsvnic_send_hbeat(xsvnicp)) {
+                       xsvnicp->counters[XSVNIC_HBEAT_ERR_COUNTER]++;
+                       xsvnic_set_oper_down(xsvnicp, 1);
+               }
+       }
+       xsvnicp->send_hbeat_flag = 1;
+}
+
+static void handle_ring_size_change(struct xsvnic *xsvnicp)
+{
+       int ret;
+
+       clear_bit(XSVNIC_RING_SIZE_CHANGE, &xsvnicp->state);
+       /*
+        * Now destroy ctx
+        */
+       xscore_conn_destroy(&xsvnicp->data_conn.ctx);
+       xsvnicp->data_conn.ctx.rx_ring_size = xsvnicp->rx_ring_size;
+       xsvnicp->data_conn.ctx.tx_ring_size = xsvnicp->tx_ring_size;
+
+       ret = xscore_conn_init(&xsvnicp->data_conn.ctx,
+                              xsvnicp->xsmp_info.port);
+       if (ret)
+               DRV_ERROR("xscore_conn_init data error for VNIC %s, ret = %d\n",
+                         xsvnicp->vnic_name, ret);
+}
+
+static void handle_multicast(struct xsvnic *xsvnicp)
+{
+       unsigned long flags;
+
+       if (test_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state)) {
+               if (test_bit(XSVNIC_MCAST_LIST_TIMEOUT, &xsvnicp->state)) {
+                       spin_lock_irqsave(&xsvnicp->lock, flags);
+                       xsvnicp->counters[XSVNIC_MCAST_LIST_NORESP_COUNTER]++;
+                       clear_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state);
+                       if (test_and_clear_bit
+                           (XSVNIC_MCAST_LIST_PENDING, &xsvnicp->state))
+                               _xsvnic_set_multicast(xsvnicp);
+                       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+               } else
+                       set_bit(XSVNIC_MCAST_LIST_TIMEOUT, &xsvnicp->state);
+       }
+}
+
+static void handle_action_flags(struct xsvnic *xsvnicp)
+{
+       if (test_bit(XSVNIC_TRIGGER_NAPI_SCHED, &xsvnicp->state)) {
+               xsvnic_data_recv_handler(xsvnicp);
+               clear_bit(XSVNIC_TRIGGER_NAPI_SCHED, &xsvnicp->state);
+       }
+}
+
+static void handle_post_conn_setup(struct xsvnic *xsvnicp)
+{
+       int ret;
+       unsigned long flags;
+
+       xsvnicp->counters[XSVNIC_IB_RECOVERED_COUNTER]++;
+       xsvnicp->send_hbeat_flag = 0;
+       set_bit(XSVNIC_OPER_UP, &xsvnicp->state);
+       xsvnic_xsmp_send_oper_state(xsvnicp, xsvnicp->resource_id,
+                                   XSMP_XSVNIC_OPER_UP);
+       /*
+        * Now send multicast list & vlan list
+        */
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       _xsvnic_set_multicast(xsvnicp);
+       xsvnic_send_allvlan_list(xsvnicp);
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+       init_completion(&xsvnicp->done);
+       set_bit(XSVNIC_START_RX_SENT, &xsvnicp->state);
+       clear_bit(XSVNIC_START_RESP_RCVD, &xsvnicp->state);
+       ret = xsvnic_send_start_stop(xsvnicp, XSVNIC_START_RX);
+       if (ret || !wait_for_completion_timeout(&xsvnicp->done,
+                                               msecs_to_jiffies(1000 * 5))) {
+               IOCTRL_ERROR("%s: start send failed ", xsvnicp->vnic_name);
+               IOCTRL_ERROR("%d or did not get rx start resp\n", ret);
+               xsvnic_set_oper_down(xsvnicp, 1);
+       } else {
+               napi_schedule(&xsvnicp->napi);
+               if (xsvnicp->mp_flag &
+                   (MP_XSVNIC_PRIMARY | MP_XSVNIC_SECONDARY))
+                       xsvnic_xsmp_send_ha_state(xsvnicp, xsvnicp->ha_state);
+       }
+}
+
+static void xsvnic_conn_state_machine(struct xsvnic *xsvnicp)
+{
+       struct xsvnic_conn *cconn = &xsvnicp->ctrl_conn;
+       struct xsvnic_conn *dconn = &xsvnicp->data_conn;
+       int ret;
+
+       switch (cconn->state) {
+       case XSVNIC_CONN_ERROR:
+               xsvnic_io_disconnect(xsvnicp);
+               break;
+       case XSVNIC_CONN_DISCONNECTED:
+       case XSVNIC_CONN_INIT:
+               xsvnicp->counters[XSVNIC_IB_RECOVERY_COUNTER]++;
+               set_bit(XSVNIC_PORT_LINK_UP, &xsvnicp->state);
+               clear_bit(XSVNIC_INTR_ENABLED, &xsvnicp->state);
+               clear_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state);
+               clear_bit(XSVNIC_MCAST_LIST_PENDING, &xsvnicp->state);
+               clear_bit(XSVNIC_MCAST_LIST_TIMEOUT, &xsvnicp->state);
+               cconn->state = XSVNIC_CONN_CONNECTING;
+               ret = xscore_conn_connect(&cconn->ctx, 0);
+               if (ret)
+                       cconn->state = XSVNIC_CONN_ERROR;
+               break;
+       case XSVNIC_CONN_CONNECTED:
+               switch (dconn->state) {
+               case XSVNIC_CONN_ERROR:
+                       xsvnic_io_disconnect(xsvnicp);
+                       break;
+               case XSVNIC_CONN_DISCONNECTED:
+               case XSVNIC_CONN_INIT:
+                       dconn->state = XSVNIC_CONN_CONNECTING;
+                       ret = xscore_conn_connect(&dconn->ctx, 0);
+                       if (ret) {
+                               dconn->state = XSVNIC_CONN_ERROR;
+                               cconn->state = XSVNIC_CONN_ERROR;
+                       }
+                       break;
+               case XSVNIC_CONN_CONNECTED:
+                       handle_post_conn_setup(xsvnicp);
+                       break;
+               default:
+                       break;
+               }
+               break;
+       default:
+               break;
+       }
+}
+
+/*
+ * This function can get called from workqueue/thread context
+ */
+static int xsvnic_state_machine(struct xsvnic *xsvnicp)
+{
+       if (!test_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state) ||
+           !test_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state) ||
+           test_bit(XSVNIC_XT_DOWN, &xsvnicp->state) ||
+           test_bit(XSVNIC_IBLINK_DOWN, &xsvnicp->state) ||
+           test_bit(XSVNIC_DELETING, &xsvnicp->state)) {
+               xsvnic_io_disconnect(xsvnicp);
+               if (test_bit(XSVNIC_SEND_ADMIN_STATE, &xsvnicp->state)) {
+                       clear_bit(XSVNIC_SEND_ADMIN_STATE, &xsvnicp->state);
+                       xsvnic_xsmp_send_notification(xsvnicp->xsmp_hndl,
+                                                     xsvnicp->resource_id,
+                                                     XSMP_XSVNIC_UPDATE);
+               }
+               if (test_bit(XSVNIC_CHASSIS_ADMIN_SHADOW_UP, &xsvnicp->state))
+                       set_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state);
+               else
+                       clear_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state);
+               xsvnicp->sm_delay = 2000;
+               handle_ha_sm(xsvnicp);
+               return 0;
+       }
+       /*
+        * If it is operationally up done with it
+        */
+       if (test_bit(XSVNIC_OPER_UP, &xsvnicp->state)) {
+               xsvnicp->counters[XSVNIC_OPER_UP_STATE_COUNTER]++;
+               handle_hbeat_sm(xsvnicp);
+               handle_ha_sm(xsvnicp);
+               handle_multicast(xsvnicp);
+               handle_action_flags(xsvnicp);
+               if (test_bit(XSVNIC_RX_NOBUF, &xsvnicp->state)) {
+                       if (!xscore_refill_recv
+                           (&xsvnicp->data_conn.ctx, GFP_KERNEL))
+                               clear_bit(XSVNIC_RX_NOBUF, &xsvnicp->state);
+                       else
+                               xsvnicp->counters[XSVNIC_RX_NOBUF_COUNTER]++;
+               }
+               xsvnicp->sm_delay = 2000;
+               return 0;
+       }
+       xsvnic_conn_state_machine(xsvnicp);
+       xsvnicp->sm_delay = 1000;
+       return 0;
+}
+
+static void xsvnic_state_machine_work(struct work_struct *work)
+{
+       struct xsvnic *xsvnicp = container_of(work, struct xsvnic,
+                                             sm_work.work);
+
+       mutex_lock(&xsvnicp->mutex);
+       xsvnic_state_machine(xsvnicp);
+       mutex_unlock(&xsvnicp->mutex);
+       queue_sm_work(xsvnicp, xsvnicp->sm_delay);
+}
+
+static void queue_sm_work(struct xsvnic *xsvnicp, int msecs)
+{
+       unsigned long flags;
+       int del = 0;
+
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       if (!test_bit(XSVNIC_DELETING, &xsvnicp->state))
+               queue_delayed_work(xsvnic_wq, &xsvnicp->sm_work,
+                                  msecs_to_jiffies(msecs));
+       else
+               del = 1;
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+       if (del)
+               xsvnic_remove_vnic(xsvnicp);
+}
+
+static void xsvnic_ctrl_event_handler(void *client_arg, int event)
+{
+       struct xsvnic *xsvnicp = client_arg;
+
+       mutex_lock(&xsvnicp->mutex);
+       switch (event) {
+       case XSCORE_CONN_CONNECTED:
+               xsvnicp->counters[XSVNIC_CTRL_CONN_OK_COUNTER]++;
+               xsvnicp->ctrl_conn.state = XSVNIC_CONN_CONNECTED;
+               break;
+       case XSCORE_CONN_ERR:
+               xsvnicp->counters[XSVNIC_CTRL_ERR_COUNTER]++;
+               xsvnicp->ctrl_conn.state = XSVNIC_CONN_ERROR;
+               break;
+       case XSCORE_CONN_RDISCONNECTED:
+               xsvnicp->counters[XSVNIC_CTRL_RDISC_COUNTER]++;
+               xsvnicp->ctrl_conn.state = XSVNIC_CONN_DISCONNECTED;
+               xsvnic_set_oper_down(xsvnicp, 1);
+               break;
+       default:
+               break;
+       }
+       mutex_unlock(&xsvnicp->mutex);
+}
+
+static void xsvnic_data_event_handler(void *client_arg, int event)
+{
+       struct xsvnic *xsvnicp = client_arg;
+
+       mutex_lock(&xsvnicp->mutex);
+       switch (event) {
+       case XSCORE_CONN_CONNECTED:
+               xsvnicp->counters[XSVNIC_DATA_CONN_OK_COUNTER]++;
+               xsvnicp->data_conn.state = XSVNIC_CONN_CONNECTED;
+               break;
+       case XSCORE_CONN_ERR:
+               xsvnicp->counters[XSVNIC_DATA_ERR_COUNTER]++;
+               xsvnicp->data_conn.state = XSVNIC_CONN_ERROR;
+               break;
+       case XSCORE_CONN_RDISCONNECTED:
+               xsvnicp->counters[XSVNIC_DATA_RDISC_COUNTER]++;
+               xsvnicp->data_conn.state = XSVNIC_CONN_DISCONNECTED;
+               xsvnic_set_oper_down(xsvnicp, 1);
+               break;
+       default:
+               break;
+       }
+       mutex_unlock(&xsvnicp->mutex);
+}
+
+static struct page *xsvnic_alloc_pages(int *size, int *page_order)
+{
+       gfp_t alloc_flags = GFP_ATOMIC;
+       u16 order = get_order(*size);
+       int chan_size = (1 << get_order(*size)) * PAGE_SIZE;
+
+       *size = chan_size;
+       *page_order = order;
+
+       if (order > 0)
+               alloc_flags |= __GFP_COMP;
+
+       return alloc_pages(alloc_flags, order);
+}
+
+static u8 *xsvnic_skb_alloc(void *client_arg, void **cookie, int len)
+{
+       struct xsvnic *xsvnicp = client_arg;
+       struct sk_buff *skb;
+
+       skb = dev_alloc_skb(len);
+       if (!skb)
+               return NULL;
+
+       skb_reserve(skb, NET_IP_ALIGN);
+       skb->dev = xsvnicp->netdev;
+       *cookie = skb;
+       xsvnicp->counters[XSVNIC_RX_SKB_ALLOC_COUNTER]++;
+       return skb->data;
+}
+
+static struct page *xsvnic_page_alloc(void *client_arg, void **cookie,
+                                     int *rsize, int element)
+{
+       struct xsvnic *xsvnicp = client_arg;
+       struct page *page = xsvnic_alloc_pages(rsize, &xsvnicp->page_order);
+
+       if (!page) {
+               pr_info("XSVNIC: Unable to allocate page size %d\n", *rsize);
+               return NULL;
+       }
+
+       xsvnicp->counters[XSVNIC_RX_SKB_ALLOC_COUNTER]++;
+       *cookie = page;
+
+       return page;
+}
+
+static void xsvnic_page_free(void *client_arg, void *cookie, int dir)
+{
+       struct sk_buff *skb = NULL;
+       struct page *page = NULL;
+       struct xsvnic *xsvnicp = client_arg;
+
+       if (dir == XSCORE_SEND_BUF) {
+               skb = cookie;
+               xsvnic_dev_kfree_skb_any(skb);
+               xsvnicp->counters[XSVNIC_TX_SKB_FREE_COUNTER]++;
+       } else {
+               xsvnicp->counters[XSVNIC_RX_SKB_FREE_COUNTER]++;
+               page = cookie;
+               put_page(page);
+       }
+
+}
+
+static void xsvnic_skb_free(void *client_arg, void *cookie, int dir)
+{
+       struct sk_buff *skb = cookie;
+       struct xsvnic *xsvnicp = client_arg;
+
+       xsvnic_dev_kfree_skb_any(skb);
+       if (dir == XSCORE_SEND_BUF)
+               xsvnicp->counters[XSVNIC_TX_SKB_FREE_COUNTER]++;
+       else
+               xsvnicp->counters[XSVNIC_RX_SKB_FREE_COUNTER]++;
+
+}
+
+static inline void xsvnic_process_rbuf_error(struct xsvnic *xsvnicp,
+                                            struct xscore_buf_info *binfo)
+{
+       struct page *page;
+       struct sk_buff *skb;
+
+       if (xsvnicp->is_rxbatching) {
+               page = binfo->cookie;
+               put_page(page);
+       } else {
+               skb = binfo->cookie;
+               xsvnic_dev_kfree_skb_any(skb);
+       }
+
+}
+
+static u8 *xsvnic_ctrl_alloc(void *client_arg, void **cookie, int sz)
+{
+       return kmalloc(sz, GFP_ATOMIC);
+}
+
+static void xsvnic_ctrl_free(void *client_arg, void *cookie, int dir)
+{
+       kfree(cookie);
+}
+
+static void xsvnic_buf_init(struct xsvnic *xsvnicp,
+                           struct xscore_conn_ctx *cctx)
+{
+       if (xsvnicp->is_rxbatching) {
+               cctx->rx_buf_size = (PAGE_SIZE * 2);
+               cctx->alloc_page_bufs = xsvnic_page_alloc;
+               cctx->alloc_buf = 0;
+               cctx->free_buf = xsvnic_page_free;
+       } else {
+               cctx->rx_buf_size = xsvnicp->mtu + NET_IP_ALIGN + ETH_HLEN + 12;
+               cctx->alloc_page_bufs = 0;
+               cctx->alloc_buf = xsvnic_skb_alloc;
+               cctx->free_buf = xsvnic_skb_free;
+       }
+}
+
+int check_rxbatch_possible(struct xsvnic *xsvnicp, int flag)
+{
+       if (flag && (xsvnicp->install_flag & XSVNIC_INSTALL_RX_BAT)
+           && (xsvnicp->install_flag & XSVNIC_8K_IBMTU)
+           && (xsvnicp->mtu <= (PAGE_SIZE * 2)) && xsvnicp->xsmp_info.is_shca)
+               return 1;
+       else
+               return 0;
+}
+
+static void handle_rxbatch_change(struct xsvnic *xsvnicp)
+{
+       int ret;
+       struct xscore_conn_ctx *ctx = &xsvnicp->data_conn.ctx;
+       struct xt_cm_private_data *cmp =
+           (struct xt_cm_private_data *)ctx->priv_data;
+
+       clear_bit(XSVNIC_RXBATCH_CHANGE, &xsvnicp->state);
+       xscore_conn_destroy(ctx);
+
+       /*
+        * Change rx batching settings
+        */
+       xsvnicp->is_rxbatching = xsvnicp->is_rxbat_operational;
+       xsvnic_buf_init(xsvnicp, ctx);
+
+       if (xsvnicp->is_rxbatching) {
+               cmp->data_qp_type |= cpu_to_be32(XSVNIC_RXBAT_BIT);
+               cmp->data_qp_type |= cpu_to_be32(XSVNIC_RXBAT_TIMER_BIT);
+       } else {
+               cmp->data_qp_type &= ~(cpu_to_be32(XSVNIC_RXBAT_BIT));
+               cmp->data_qp_type &= ~(cpu_to_be32(XSVNIC_RXBAT_TIMER_BIT));
+       }
+
+       ret = xscore_conn_init(ctx, xsvnicp->xsmp_info.port);
+       if (ret)
+               DRV_ERROR("xscore_conn_init data error for VNIC %s, ret = %d\n",
+                         xsvnicp->vnic_name, ret);
+}
+
+static int xsvnic_conn_init(struct xsvnic *xsvnicp)
+{
+       struct xsvnic_conn *cp;
+       struct xscore_conn_ctx *cctx;
+       struct xt_cm_private_data *cmp;
+       int ret;
+
+       cp = &xsvnicp->ctrl_conn;
+       cctx = &cp->ctx;
+       /*
+        * Control connection
+        */
+       cp->type = XSVNIC_IO_QP_TYPE_CONTROL;
+       cctx->tx_ring_size = 4;
+       cctx->rx_ring_size = 4;
+       cctx->rx_buf_size = XSVNIC_MAX_BUF_SIZE;
+       cctx->client_arg = xsvnicp;
+       cctx->alloc_buf = xsvnic_ctrl_alloc;
+       cctx->free_buf = xsvnic_ctrl_free;
+       cctx->send_compl_handler = xsvnic_ctrl_send_handler;
+       cctx->recv_msg_handler = xsvnic_ctrl_recv_handler;
+       cctx->event_handler = xsvnic_ctrl_event_handler;
+       cctx->dguid = xsvnicp->tca_guid;
+       cctx->dlid = xsvnicp->tca_lid;
+       cctx->service_id = be64_to_cpu(TCA_SERVICE_ID);
+
+       cmp = (struct xt_cm_private_data *)cctx->priv_data;
+       cmp->vid = cpu_to_be64(xsvnicp->resource_id);
+       cmp->qp_type = cpu_to_be16(XSVNIC_IO_QP_TYPE_CONTROL);
+
+       cctx->priv_data_len = sizeof(*cmp);
+
+       ret = xscore_conn_init(cctx, xsvnicp->xsmp_info.port);
+       if (ret) {
+               DRV_ERROR("xscore_conn_init ctrl error for VID %llx %d\n",
+                         xsvnicp->resource_id, ret);
+               return ret;
+       }
+
+       cp = &xsvnicp->data_conn;
+       cctx = &cp->ctx;
+
+       cp->type = XSVNIC_IO_QP_TYPE_DATA;
+       cctx->tx_ring_size = xsvnicp->tx_ring_size;
+       cctx->rx_ring_size = xsvnicp->rx_ring_size;
+       cctx->client_arg = xsvnicp;
+
+       /*
+        * 8K IB MTU is for softhca only
+        */
+       if (xsvnicp->install_flag & XSVNIC_8K_IBMTU
+           && xsvnicp->xsmp_info.is_shca)
+               cctx->features |= XSCORE_8K_IBMTU_SUPPORT;
+
+       if (check_rxbatch_possible(xsvnicp, xsvnic_rxbatching))
+               xsvnicp->is_rxbatching = 1;
+
+       xsvnic_buf_init(xsvnicp, cctx);
+
+       cctx->send_compl_handler = 0;
+       cctx->recv_compl_handler = xsvnic_data_recv_handler;
+       cctx->event_handler = xsvnic_data_event_handler;
+       cctx->dguid = xsvnicp->tca_guid;
+       cctx->dlid = xsvnicp->tca_lid;
+       cctx->service_id = be64_to_cpu(TCA_SERVICE_ID);
+       cctx->features |= XSCORE_SG_SUPPORT;
+       if (!xsvnic_tx_intr_mode) {
+               cctx->features |= XSCORE_NO_SEND_COMPL_INTR;
+       } else {
+               cctx->tx_max_coalesced_frames = xsvnic_max_coal_frames;
+               cctx->tx_coalesce_usecs = xsvnic_coal_usecs;
+       }
+
+       if (!xsvnic_rx_intr_mode) {
+               cctx->features |= XSCORE_NO_RECV_COMPL_INTR;
+       } else {
+               cctx->rx_max_coalesced_frames = xsvnic_max_coal_frames;
+               cctx->rx_coalesce_usecs = xsvnic_coal_usecs;
+       }
+
+       cmp = (struct xt_cm_private_data *)cctx->priv_data;
+       cmp->vid = cpu_to_be64(xsvnicp->resource_id);
+       cmp->qp_type = cpu_to_be16(XSVNIC_IO_QP_TYPE_DATA);
+
+       if (xsvnicp->is_tso && (xsvnicp->netdev->features & NETIF_F_TSO))
+               cmp->data_qp_type |= cpu_to_be32(XSVNIC_TSO_BIT);
+
+       if (xsvnicp->is_rxbatching) {
+               cmp->data_qp_type |= cpu_to_be32(XSVNIC_RXBAT_BIT);
+               cmp->data_qp_type |= cpu_to_be32(XSVNIC_RXBAT_TIMER_BIT);
+       }
+
+       cctx->priv_data_len = sizeof(*cmp);
+
+       ret = xscore_conn_init(cctx, xsvnicp->xsmp_info.port);
+       if (ret) {
+               DRV_ERROR("xscore_conn_init data error for VID %llx %d\n",
+                         xsvnicp->resource_id, ret);
+               xscore_conn_destroy(&xsvnicp->ctrl_conn.ctx);
+       }
+       return ret;
+}
+
+/*
+ * All the functions related to the stack
+ */
+
+static void xsvnic_setup(struct net_device *netdev)
+{
+       ether_setup(netdev);
+}
+
+static int xsvnic_open(struct net_device *netdev)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+
+       xsvnicp->counters[XSVNIC_OPEN_COUNTER]++;
+       mutex_lock(&xsvnicp->mutex);
+       napi_enable(&xsvnicp->napi);
+       set_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state);
+       mutex_unlock(&xsvnicp->mutex);
+       return 0;
+}
+
+static int xsvnic_stop(struct net_device *netdev)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+       unsigned long flags;
+
+#ifdef __VMKLNX__
+       /* set trans_start so we don't get spurious watchdogs during reset */
+       netdev->trans_start = jiffies;
+#endif
+
+       xsvnicp->counters[XSVNIC_STOP_COUNTER]++;
+       mutex_lock(&xsvnicp->mutex);
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       clear_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state);
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+       xsvnic_io_disconnect(xsvnicp);
+       napi_disable(&xsvnicp->napi);
+       mutex_unlock(&xsvnicp->mutex);
+       return 0;
+}
+
+static struct net_device_stats *xsvnic_get_stats(struct net_device *netdev)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+
+       xsvnicp->counters[XSVNIC_GETSTATS_COUNTER]++;
+       return &xsvnicp->stats;
+}
+
+static void xsvnic_tx_timeout(struct net_device *dev)
+{
+       struct xsvnic *xsvnicp = netdev_priv(dev);
+
+       xsvnicp->counters[XSVNIC_WDOG_TIMEOUT_COUNTER]++;
+       xsvnic_set_oper_down(xsvnicp, 1);
+}
+
+static int xsvnic_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       return 0;
+}
+
+static int xsvnic_set_mac_address(struct net_device *dev, void *p)
+{
+       struct sockaddr *addr = p;
+       struct xsvnic *xsvnicp = netdev_priv(dev);
+
+       if (!is_valid_ether_addr((u8 *) (addr->sa_data)))
+               return -EINVAL;
+
+       if (memcmp(dev->dev_addr, addr->sa_data, dev->addr_len) != 0) {
+               memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+               xsvnicp->counters[XSVNIC_MAC_ADDR_CHNG]++;
+       }
+
+       return 0;
+}
+
+/*
+ * Copy all the Multicast addresses from src to the xsvnic device dst
+ */
+static int xsvnic_mc_list_copy(struct xsvnic *xsvnicp)
+{
+       struct net_device *netdev = xsvnicp->netdev;
+
+       if (xsvnicp->mc_addrs != NULL)
+               kfree(xsvnicp->mc_addrs);
+
+       xsvnicp->mc_addrs = kmalloc(netdev_mc_count(netdev) *
+                                   sizeof(struct ether_addr), GFP_ATOMIC);
+
+       if (!xsvnicp->mc_addrs)
+               return -ENOMEM;
+       xsvnicp->mc_count = netdev_mc_count(netdev);
+       netdev_mc_list_copy(xsvnicp);
+       return 0;
+}
+
+static void _xsvnic_set_multicast(struct xsvnic *xsvnicp)
+{
+       int count = xsvnicp->mc_count;
+       int i;
+       u8 *msg, *pay;
+       int tlen;
+
+       if (multicast_list_disable || xsvnicp->ctrl_conn.state
+           != XSVNIC_CONN_CONNECTED)
+               return;
+
+       if (test_bit(XSVNIC_MCAST_LIST_SENT, &xsvnicp->state)) {
+               /*
+                * Once response comes back for sent list, this will trigger
+                * another send operation
+                */
+               set_bit(XSVNIC_MCAST_LIST_PENDING, &xsvnicp->state);
+               return;
+       }
+
+       xsvnicp->counters[XSVNIC_SET_MCAST_COUNTER]++;
+       /*
+        * Copy over the multicast list and send it over
+        */
+       xsvnicp->iff_promisc = 0;
+       if ((xsvnicp->netdev->flags & (IFF_ALLMULTI | IFF_PROMISC))
+           || count > XSVNIC_MACLIST_MAX)
+               xsvnicp->iff_promisc = 1;
+       if (count > XSVNIC_MACLIST_MAX)
+               count = XSVNIC_MACLIST_MAX;
+       tlen = ETH_ALEN * count + sizeof(struct xsvnic_control_msg);
+       msg = kmalloc(tlen, GFP_ATOMIC);
+       if (!msg)
+               return;
+       pay = msg + sizeof(struct xsvnic_control_msg);
+       for (i = 0; i < count; i++) {
+               ether_addr_copy(pay, (u8 *)&(xsvnicp->mc_addrs[i]));
+               pay += ETH_ALEN;
+       }
+       xsvnic_send_multicast_list(xsvnicp, msg, tlen, xsvnicp->iff_promisc);
+}
+
+static void xsvnic_set_multicast(struct net_device *netdev)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       xsvnic_mc_list_copy(xsvnicp);
+       _xsvnic_set_multicast(xsvnicp);
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+}
+
+static int  xsvnic_vlan_rx_add_vlanid(struct net_device *netdev, __be16 proto,
+                               u16 vlanid)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+       struct vlan_entry *vlan;
+       unsigned long flags;
+
+       xsvnicp->counters[XSVNIC_VLAN_RX_ADD_COUNTER]++;
+       /*
+        * The control message to IOP can accommodate 1024 size
+        * We restrict the number of vlans to 500
+        * Ideally we do not need it since it was for legacy reasons
+        */
+       if (xsvnicp->vlan_count >= XSVNIC_VLANLIST_MAX)
+               return -1;
+       vlan = kmalloc(sizeof(struct vlan_entry), GFP_ATOMIC);
+       if (!vlan)
+               return -1;
+       INIT_LIST_HEAD(&vlan->vlan_list);
+       vlan->vlan_id = vlanid;
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       list_add_tail(&vlan->vlan_list, &xsvnicp->vlan_list);
+       xsvnicp->vlan_count++;
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+       xsvnic_send_vlan_list(xsvnicp, &vlanid, 1, XSVNIC_ASSIGN_VLAN);
+       return 0;
+}
+
+static int xsvnic_vlan_rx_kill_vlanid(struct net_device *netdev, __be16 proto,
+                               u16 vlanid)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+       struct vlan_entry *vlan;
+       unsigned long flags;
+
+       xsvnicp->counters[XSVNIC_VLAN_RX_DEL_COUNTER]++;
+
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       list_for_each_entry(vlan, &xsvnicp->vlan_list, vlan_list) {
+               if (vlan->vlan_id == vlanid) {
+                       list_del(&vlan->vlan_list);
+                       kfree(vlan);
+                       xsvnicp->vlan_count--;
+                       xsvnic_send_vlan_list(xsvnicp, &vlanid, 1,
+                                             XSVNIC_UNASSIGN_VLAN);
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+       return 0;
+}
+
+int xsvnic_change_rxbatch(struct xsvnic *xsvnicp, int flag)
+{
+
+       if (xsvnicp->is_rxbatching != flag) {
+               if (flag && !check_rxbatch_possible(xsvnicp, flag))
+                       return -EINVAL;
+
+               set_bit(XSVNIC_RXBATCH_CHANGE, &xsvnicp->state);
+               xsvnic_set_oper_down(xsvnicp, 1);
+               xsvnicp->is_rxbat_operational = flag;
+       }
+
+       return 1;
+}
+/*
+static int xsvnic_get_settings(struct net_device *netdev,
+                              struct ethtool_cmd *ecmd)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+
+       ecmd->autoneg = 0;
+       ecmd->speed = SPEED_1000;
+       ecmd->duplex = DUPLEX_FULL;
+       if (netif_carrier_ok(netdev)) {
+               if ((xsvnicp->port_speed == SPEED_1000) && xsvnic_report_10gbps)
+                       ecmd->speed = SPEED_10000;
+               else
+                       ecmd->speed = xsvnicp->port_speed;
+
+               if (ecmd->speed > SPEED_1000) {
+                       ecmd->advertising = ADVERTISED_10000baseT_Full;
+                       ecmd->supported = SUPPORTED_10000baseT_Full |
+                           SUPPORTED_FIBRE | SUPPORTED_Autoneg;
+                       ecmd->port = PORT_FIBRE;
+                       ecmd->transceiver = XCVR_EXTERNAL;
+               } else {
+                       ecmd->advertising = ADVERTISED_1000baseT_Full |
+                           ADVERTISED_100baseT_Full;
+                       ecmd->supported =
+                           SUPPORTED_10baseT_Full | SUPPORTED_10baseT_Half |
+                           SUPPORTED_100baseT_Full | SUPPORTED_100baseT_Half |
+                           SUPPORTED_1000baseT_Full | SUPPORTED_1000baseT_Half
+                           | SUPPORTED_TP | SUPPORTED_Autoneg;
+                       ecmd->transceiver = XCVR_INTERNAL;
+                       ecmd->port = PORT_TP;
+               }
+       }
+       return 0;
+}
+*/
+/*
+static int xsvnic_set_ringparam(struct net_device *netdev,
+                               struct ethtool_ringparam *ering)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+
+       if (ering->rx_pending >= 32
+           && ering->rx_pending <= ering->rx_max_pending)
+               xsvnicp->rx_ring_size = ering->rx_pending;
+
+       if (ering->tx_pending >= 32
+           && ering->tx_pending <= ering->tx_max_pending)
+               xsvnicp->tx_ring_size = ering->tx_pending;
+
+       set_bit(XSVNIC_RING_SIZE_CHANGE, &xsvnicp->state);
+       xsvnic_set_oper_down(xsvnicp, 1);
+       return 0;
+}
+*/
+/*
+static void xsvnic_get_ringparam(struct net_device *netdev,
+                                struct ethtool_ringparam *ering)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+
+       ering->rx_max_pending = 2048;
+       ering->rx_mini_max_pending = 0;
+       ering->rx_jumbo_max_pending = 384;
+       ering->rx_pending = xsvnicp->data_conn.ctx.rx_ring_size;
+       ering->rx_mini_pending = 0;
+       ering->rx_jumbo_pending = xsvnicp->data_conn.ctx.rx_ring_size;
+       ering->tx_max_pending = 2048;
+       ering->tx_pending = xsvnicp->data_conn.ctx.tx_ring_size;
+}
+*/
+/*
+static void xsvnic_get_drvinfo(struct net_device *netdev,
+                              struct ethtool_drvinfo *drvinfo)
+{
+       strncpy(drvinfo->driver, "xsvnic", 32);
+       strncpy(drvinfo->version, XSVNIC_DRIVER_VERSION, 32);
+       strncpy(drvinfo->fw_version, "N/A", 32);
+       strncpy(drvinfo->bus_info, "N/A", 32);
+}
+*/
+
+u32 xsvnic_op_get_rx_csum(struct net_device *dev)
+{
+       return (dev->features & NETIF_F_IP_CSUM) != 0;
+}
+
+int xsvnic_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
+{
+       struct xsvnic *xsvnicp = netdev_priv(dev);
+
+       if (xsvnic_tx_intr_mode) {
+               coal->tx_coalesce_usecs =
+                   xsvnicp->data_conn.ctx.tx_coalesce_usecs;
+               coal->tx_max_coalesced_frames =
+                   xsvnicp->data_conn.ctx.tx_max_coalesced_frames;
+       }
+
+       if (xsvnic_rx_intr_mode) {
+               coal->rx_coalesce_usecs =
+                   xsvnicp->data_conn.ctx.rx_coalesce_usecs;
+               coal->rx_max_coalesced_frames =
+                   xsvnicp->data_conn.ctx.rx_max_coalesced_frames;
+       }
+
+       return 0;
+}
+
+int xsvnic_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
+{
+
+       struct xsvnic *xsvnicp = netdev_priv(dev);
+       u32 tx_usecs, tx_frames;
+       u32 rx_usecs, rx_frames;
+       u32 ret;
+       struct xscore_conn_ctx *ctx;
+
+       if (coal->rx_coalesce_usecs > 0xffff ||
+           coal->rx_max_coalesced_frames > 0xffff)
+               return -EINVAL;
+
+       ctx = &xsvnicp->data_conn.ctx;
+
+       tx_usecs = ctx->tx_coalesce_usecs;
+       tx_frames = ctx->tx_max_coalesced_frames;
+       rx_usecs = ctx->rx_coalesce_usecs;
+       rx_frames = ctx->rx_max_coalesced_frames;
+
+       /* Modify TX cq */
+       if (xsvnic_tx_intr_mode && ((tx_usecs != coal->tx_coalesce_usecs) ||
+                                   (tx_frames !=
+                                    coal->tx_max_coalesced_frames))) {
+               ret = xscore_modify_cq(ctx->scq, coal->tx_max_coalesced_frames,
+                                      coal->tx_coalesce_usecs);
+               if (ret && ret != -ENOSYS) {
+                       pr_info("failed modifying Send CQ (%d) vnic ", ret);
+                       pr_info("%s\n", xsvnicp->vnic_name);
+                       return ret;
+               }
+
+               ctx->tx_coalesce_usecs = coal->tx_coalesce_usecs;
+               ctx->tx_max_coalesced_frames = coal->tx_max_coalesced_frames;
+       }
+
+       /* Modify RX cq */
+       if (xsvnic_rx_intr_mode && ((rx_usecs != coal->rx_coalesce_usecs) ||
+                                   (rx_frames !=
+                                    coal->rx_max_coalesced_frames))) {
+               ret = xscore_modify_cq(ctx->rcq, coal->rx_max_coalesced_frames,
+                                      coal->rx_coalesce_usecs);
+               if (ret && ret != -ENOSYS) {
+                       pr_err("failed modifying Recv CQ (%d) vnic ", ret);
+                       pr_err("%s\n", xsvnicp->vnic_name);
+                       return ret;
+               }
+               ctx->rx_coalesce_usecs = coal->rx_coalesce_usecs;
+               ctx->rx_max_coalesced_frames = coal->rx_max_coalesced_frames;
+       }
+
+       return 0;
+}
+
+/*
+static struct ethtool_ops xsvnic_ethtool_ops = {
+       .get_settings = xsvnic_get_settings,
+       .get_drvinfo = xsvnic_get_drvinfo,
+       .get_link = ethtool_op_get_link,
+       .get_ringparam = xsvnic_get_ringparam,
+       .set_ringparam = xsvnic_set_ringparam,
+       .set_coalesce = xsvnic_set_coalesce,
+};
+*/
+
+static int xsvnic_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+       struct mii_ioctl_data *data = if_mii(ifr);
+       int ret = 0;
+       struct xsvnic *xsvnicp;
+
+       if (!netif_running(netdev))
+               return -EAGAIN;
+
+       xsvnicp = netdev_priv(netdev);
+       xsvnicp->counters[XSVNIC_IOCTL_COUNTER]++;
+
+       switch (cmd) {
+       case SIOCGMIIPHY:
+               data->phy_id = 5;
+               break;
+       case SIOCGMIIREG:
+               /*
+                * Mainly used by mii monitor
+                */
+               switch (data->reg_num) {
+               case 0:
+                       data->val_out = 0x2100;
+                       break;
+               case 1:
+                       data->val_out = 0xfe00 |
+                           (netif_carrier_ok(netdev) << 2);
+                       break;
+               default:
+                       break;
+               }
+               break;
+       default:
+               ret = -EOPNOTSUPP;
+               break;
+       }
+       return ret;
+}
+
+/*
+ * Needs to be clled with spin_lock held
+ */
+static void handle_qp_error(struct xsvnic *xsvnicp, int qp_error)
+{
+       pr_info("XSVNIC %s: Link Down ", xsvnicp->vnic_name);
+       pr_info("(QP error %d)\n", qp_error);
+       xsvnicp->counters[XSVNIC_QP_ERROR_COUNTER]++;
+       xsvnic_set_oper_down(xsvnicp, 0);
+}
+
+static void xsvnic_reclaim_tx_buffers(struct xsvnic *xsvnicp)
+{
+       struct xscore_buf_info binfo;
+       int qp_error = 0;
+       /*
+        * Now reap completions
+        */
+       while (xscore_poll_send(&xsvnicp->data_conn.ctx, &binfo) > 0) {
+               CALC_MAX_MIN_TXTIME(xsvnicp, binfo.time_stamp);
+               xsvnicp->counters[XSVNIC_TX_SKB_FREE_COUNTER_REAP]++;
+               xsvnic_dev_kfree_skb_any(binfo.cookie);
+               if (binfo.status) {
+                       IOCTRL_INFO("VNIC: %s Data Send Completion error: %d\n",
+                                   xsvnicp->vnic_name, binfo.status);
+                       qp_error = binfo.status;
+               }
+       }
+       if (qp_error)
+               handle_qp_error(xsvnicp, qp_error);
+}
+
+int xsvnic_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+       int ret = NETDEV_TX_OK;
+       int slen = skb->len;
+       unsigned long flags;
+       u8 skb_need_tofree = 0;
+
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+
+       /* Stop sending packet if standby interface */
+       if (xsvnicp->mp_flag
+           && unlikely(test_bit(XSVNIC_STATE_STDBY, &xsvnicp->state))) {
+               dev_kfree_skb_any(skb);
+               xsvnicp->counters[XSVNIC_TX_DROP_STANDBY_COUNTER]++;
+               goto out;
+       }
+
+       if (!test_bit(XSVNIC_OPER_UP, &xsvnicp->state)) {
+               ret = NETDEV_TX_BUSY;
+               xsvnicp->stats.tx_dropped++;
+               xsvnicp->counters[XSVNIC_TX_DROP_OPER_DOWN_COUNT]++;
+               goto out;
+       }
+
+       if (skb->len < XSVNIC_MIN_PACKET_LEN) {
+               xsvnicp->counters[XSVNIC_SHORT_PKT_COUNTER]++;
+               if (skb_padto(skb, XSVNIC_MIN_PACKET_LEN)) {
+                       ret = 0;
+                       xsvnicp->stats.tx_dropped++;
+                       xsvnicp->counters[XSVNIC_TX_SKB_ALLOC_ERROR_COUNTER]++;
+                       goto reclaim;
+               }
+               skb->len = XSVNIC_MIN_PACKET_LEN;
+
+       }
+       CALC_MAX_PKT_TX(xsvnicp, skb->len);
+       /*
+        * Check if it is a gso packet
+        */
+       if (xsvnicp->is_tso) {
+               int mss, hroom;
+               int doff = 0;
+               struct xs_tsovlan_header *hdrp;
+               u16 vid = 0;
+
+               if (skb_vlan_tag_present(skb)) {
+                       hroom = sizeof(struct xs_tsovlan_header);
+                       vid = skb_vlan_tag_get(skb);
+                       xsvnicp->counters[XSVNIC_TX_VLAN_COUNTER]++;
+               } else {
+                       hroom = sizeof(struct xs_tso_header);
+               }
+               if (unlikely(skb_headroom(skb) < hroom)) {
+                       if (skb_cow(skb, hroom) < 0) {
+                               xsvnicp->stats.tx_dropped++;
+                               xsvnicp->counters[XSVNIC_TX_EXPANDSKB_ERROR]++;
+                               skb_need_tofree = 1;
+                               goto free_skb;
+                       }
+                       xsvnicp->counters[XSVNIC_TX_SKB_NOHEAD_COUNTER]++;
+               }
+
+               mss = skb_is_gso(skb);
+               if (mss) {
+                       if (skb_header_cloned(skb)) {
+                               xsvnicp->counters
+                                   [XSVNIC_TX_EXPAND_HEAD_COUNTER]++;
+                               ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+                               if (ret) {
+                                       xsvnicp->counters
+                                           [XSVNIC_TX_EXPAND_HEAD_ECNTR]++;
+                                       skb_need_tofree = 1;
+                                       goto free_skb;
+                               }
+                       }
+                       hdrp = (struct xs_tsovlan_header *)skb_push(skb, hroom);
+                       /*
+                        * Now add the MSS and data offset into the 4 byte
+                        * pre-header Into gso_header
+                        */
+                       doff =
+                           skb_transport_offset(skb) + tcp_hdrlen(skb) - hroom;
+                       xsvnicp->counters[XSVNIC_TX_SKB_TSO_COUNTER]++;
+                       hdrp->tso_info =
+                           cpu_to_be32((1 << 30) | (doff << 16) | mss);
+               } else {
+                       hdrp = (struct xs_tsovlan_header *)skb_push(skb, hroom);
+                       hdrp->tso_info = cpu_to_be32((1 << 30) | (1 << 28));
+               }
+
+               if (vid) {
+                       hdrp->vlan_info = cpu_to_be32(vid);
+                       hdrp->tso_info =
+                           be32_to_cpu(hdrp->tso_info) | (3 << 30);
+               }
+       }
+       /*
+        * Spin lock has to be released for soft-HCA to work correctly
+        */
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+       ret = xscore_post_send_sg(&xsvnicp->data_conn.ctx, skb, 0);
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       if (ret) {
+               if (ret == -ENOBUFS) {
+                       xsvnicp->stats.tx_dropped++;
+                       xsvnicp->counters[XSVNIC_TX_RING_FULL_COUNTER]++;
+               } else {
+                       handle_qp_error(xsvnicp, ret);
+               }
+               ret = NETDEV_TX_OK;
+               skb_need_tofree = 1;
+               goto free_skb;
+       }
+       netdev->trans_start = jiffies;
+       xsvnicp->send_hbeat_flag = 0;
+       xsvnicp->stats.tx_packets++;
+       xsvnicp->stats.tx_bytes += slen;
+       xsvnicp->counters[XSVNIC_TX_COUNTER]++;
+
+free_skb:
+       if (skb_need_tofree)
+               dev_kfree_skb(skb);
+
+       if (!xsvnic_tx_intr_mode
+           && (xsvnicp->reclaim_count++ > xsvnic_reclaim_count)) {
+reclaim:
+               xsvnicp->reclaim_count = 0;
+               xsvnic_reclaim_tx_buffers(xsvnicp);
+       }
+out:
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+
+       return ret;
+}
+
+static inline void xsvnic_untag_vlan(struct xsvnic *xsvnicp,
+                                    struct sk_buff *skb, u16 *vlan_tci)
+{
+       struct ethhdr *eh = (struct ethhdr *)(skb->data);
+
+       if (eh->h_proto == htons(ETH_P_8021Q)) {
+               struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+               /*
+                * Grab VLAN information and TCI fields and populate SKB
+                * Strip the vlan tag
+                */
+               *vlan_tci = be16_to_cpu(veth->h_vlan_TCI);
+               memmove((u8 *) eh + VLAN_HLEN, eh, ETH_ALEN * 2);
+               skb_pull(skb, VLAN_HLEN);
+       }
+}
+
+static inline void xsvnic_verify_checksum(struct xsvnic *xsvnicp,
+                                         struct sk_buff *skb, int sz)
+{
+       u32 trailer;
+
+       if (xsvnic_force_csum_offload) {
+               xsvnicp->counters[XSVNIC_RX_SKB_OFFLOAD_COUNTER]++;
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+               skb_trim(skb, sz - sizeof(int));
+               return;
+       } else
+               skb->ip_summed = CHECKSUM_NONE;
+
+       trailer = be32_to_cpu(*(u32 *) ((u8 *) skb->data + sz - 4));
+
+       skb_trim(skb, sz - sizeof(int));
+
+       if (!(trailer & XSIGO_IPV4_BIT)) {
+               xsvnicp->counters[XSVNIC_RX_SKB_OFFLOAD_NONIPV4_COUNTER]++;
+               return;
+       }
+
+       if (trailer & (XSIGO_TCP_CHKSUM_GOOD_BIT | XSIGO_UDP_CHKSUM_GOOD_BIT)) {
+               xsvnicp->counters[XSVNIC_RX_SKB_OFFLOAD_COUNTER]++;
+               if (trailer & XSIGO_IP_FRAGMENT_BIT) {
+                       skb->csum = (trailer >> 16);
+                       skb->ip_summed = CHECKSUM_PARTIAL;
+                       xsvnicp->counters[XSVNIC_RX_SKB_OFFLOAD_FRAG_COUNTER]++;
+               } else
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+       }
+}
+
+char *xsvnic_get_rxbat_pkts(struct xsvnic *xsvnicp, int *curr_seg_len,
+                           char *start, char *is_last_pkt, int total_pkt_len)
+{
+       int rxbat_hdr = be32_to_cpu(*(u32 *) start);
+       *curr_seg_len = RXBAT_FRAG_LEN(rxbat_hdr);
+       *is_last_pkt = (RXBAT_FINAL_BIT(rxbat_hdr) ? 1 : 0);
+       return start + XS_RXBAT_HDRLEN;
+
+}
+
+void xsvnic_send_skb(struct xsvnic *xsvnicp, struct sk_buff *skb,
+                    int curr_pkt_len, char chksum_offload)
+{
+       struct net_device *netdev = xsvnicp->netdev;
+       u16 vlan_tci = 0xFFFF;
+
+       skb->dev = netdev;
+       if ((netdev->features & NETIF_F_IP_CSUM) && chksum_offload)
+               xsvnic_verify_checksum(xsvnicp, skb, curr_pkt_len);
+       else
+               skb->ip_summed = CHECKSUM_NONE;
+       /*
+        * Software based VLAN acceleration enabled, so process it
+        */
+       if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
+               xsvnic_untag_vlan(xsvnicp, skb, &vlan_tci);
+
+       skb->protocol = eth_type_trans(skb, netdev);
+       xsvnicp->stats.rx_packets++;
+       xsvnicp->stats.rx_bytes += curr_pkt_len;
+       CALC_MAX_PKT_RX(xsvnicp, skb->len);
+       /* Enable dumping packets on Demand */
+       XSIGO_DUMP_PKT(skb->data, skb->len, "xsvnic_process_rx_skb");
+       /*
+        * Check if it is HA and standby and drop the packet
+        */
+       if (xsvnicp->mp_flag
+           && unlikely(test_bit(XSVNIC_STATE_STDBY, &xsvnicp->state))) {
+               dev_kfree_skb_any(skb);
+               xsvnicp->counters[XSVNIC_RX_DROP_STANDBY_COUNTER]++;
+       } else {
+               if (xsvnic_vlanaccel && (vlan_tci != 0xFFFF)) {
+                       __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+                                              vlan_tci);
+                       xsvnicp->counters[XSVNIC_RX_SENDTO_VLANGRP]++;
+               }
+
+               if (netdev->features & NETIF_F_LRO)
+                       lro_receive_skb(&xsvnicp->lro.lro_mgr, skb, NULL);
+               else
+                       netif_receive_skb(skb);
+       }
+       netdev->last_rx = jiffies;
+}
+
+void xsvnic_count_segs(struct xsvnic *xsvnicp, char nr_segs, int pkt_len)
+{
+       if (nr_segs > 1) {
+               xsvnicp->counters[XSVNIC_RXBAT_PKTS]++;
+               if (nr_segs <= 5)
+                       xsvnicp->counters[XSVNIC_RXBAT_BELOW_5SEGS]++;
+               else if (nr_segs > 5 && nr_segs <= 10)
+                       xsvnicp->counters[XSVNIC_RXBAT_BTW_5_10SEGS]++;
+               else if (nr_segs > 10 && nr_segs <= 20)
+                       xsvnicp->counters[XSVNIC_RXBAT_BTW_10_20SEGS]++;
+               else
+                       xsvnicp->counters[XSVNIC_RXBAT_ABOVE_20SEGS]++;
+
+               if (nr_segs > xsvnicp->counters[XSVNIC_RX_MAXBATED_COUNTER])
+                       xsvnicp->counters[XSVNIC_RX_MAXBATED_COUNTER] = nr_segs;
+       }
+       if (pkt_len > PAGE_SIZE)
+               xsvnicp->counters[XSVNIC_8KBAT_PKTS]++;
+}
+
+int xsvnic_align_addr(char **start)
+{
+       int align_diff;
+       char *align_addr = (char *)((unsigned long)(*start + 3) & ~0x3);
+
+       align_diff = align_addr - *start;
+       *start = align_addr;
+       return align_diff;
+}
+
+void xsvnic_process_rx_skb(struct xsvnic *xsvnicp,
+                          struct xscore_buf_info *binfo)
+{
+       struct sk_buff *skb;
+       int tot_pkt_len;
+
+       tot_pkt_len = binfo->sz;
+       skb = binfo->cookie;
+       skb_put(skb, tot_pkt_len);
+       xsvnic_send_skb(xsvnicp, skb, tot_pkt_len, 1);
+
+}
+
+int xsvnic_poll(struct napi_struct *napi, int budget)
+{
+       struct xsvnic *xsvnicp = container_of(napi, struct xsvnic, napi);
+       struct xscore_conn_ctx *ctx = &xsvnicp->data_conn.ctx;
+       struct xscore_buf_info binfo;
+       int ret, done = 0, qp_error = 0;
+       unsigned long flags;
+
+       /*
+        * If not connected complete it
+        */
+       xsvnicp->counters[XSVNIC_NAPI_POLL_COUNTER]++;
+       if (!test_bit(XSVNIC_OPER_UP, &xsvnicp->state)) {
+               napi_complete(&xsvnicp->napi);
+               clear_bit(XSVNIC_INTR_ENABLED, &xsvnicp->state);
+               return 0;
+       }
+again:
+       while (done < budget) {
+               ret = xscore_read_buf(ctx, &binfo);
+               if (ret != 1 || binfo.status) {
+                       if (binfo.status) {
+                               qp_error = 1;
+                               handle_qp_error(xsvnicp, binfo.status);
+                               xsvnic_process_rbuf_error(xsvnicp, &binfo);
+                       }
+                       break;
+               }
+
+               if (xsvnicp->is_rxbatching)
+                       xsvnic_process_pages(xsvnicp, &binfo);
+               else
+                       xsvnic_process_rx_skb(xsvnicp, &binfo);
+
+               xsvnicp->counters[XSVNIC_RX_SKB_COUNTER]++;
+               done++;
+       }
+
+       napi_update_budget(&xsvnicp->napi, done);
+
+       if (!qp_error && !test_bit(XSVNIC_RX_NOBUF, &xsvnicp->state)) {
+               if (xscore_refill_recv(&xsvnicp->data_conn.ctx, GFP_ATOMIC)) {
+                       xsvnicp->counters[XSVNIC_RX_NOBUF_COUNTER]++;
+                       set_bit(XSVNIC_RX_NOBUF, &xsvnicp->state);
+               }
+       }
+       if (done < budget) {
+               if (xsvnicp->netdev->features & NETIF_F_LRO)
+                       lro_flush_all(&xsvnicp->lro.lro_mgr);
+               napi_complete(&xsvnicp->napi);
+               clear_bit(XSVNIC_OVER_QUOTA, &xsvnicp->state);
+       } else {
+               set_bit(XSVNIC_OVER_QUOTA, &xsvnicp->state);
+               xsvnicp->counters[XSVNIC_RX_QUOTA_EXCEEDED_COUNTER]++;
+               return done;
+       }
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       if (test_bit(XSVNIC_OS_ADMIN_UP, &xsvnicp->state) &&
+           test_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state) &&
+           test_bit(XSVNIC_OPER_UP, &xsvnicp->state) &&
+           !test_bit(XSVNIC_DELETING, &xsvnicp->state)) {
+               set_bit(XSVNIC_INTR_ENABLED, &xsvnicp->state);
+               if (xscore_enable_rxintr(ctx)) {
+                       if (napi_reschedule(&xsvnicp->napi)) {
+                               spin_unlock_irqrestore(&xsvnicp->lock, flags);
+                               goto again;
+                       }
+               }
+       }
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+       return done;
+}
+
+static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
+                      void **tcph, u64 *hdr_flags, void *xsvnicp)
+{
+       unsigned int ip_len;
+       struct iphdr *iph;
+
+       if (unlikely(skb->protocol != htons(ETH_P_IP)))
+               return -1;
+
+       /* Check for non-TCP packet */
+       skb_reset_network_header(skb);
+       iph = ip_hdr(skb);
+       if (iph->protocol != IPPROTO_TCP)
+               return -1;
+
+       ip_len = ip_hdrlen(skb);
+       skb_set_transport_header(skb, ip_len);
+       *tcph = tcp_hdr(skb);
+
+       /* check if IP header and TCP header are complete */
+       if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
+               return -1;
+
+       *hdr_flags = LRO_IPV4 | LRO_TCP;
+       *iphdr = iph;
+
+       return 0;
+}
+
+static void xsvnic_lro_setup(struct xsvnic *xsvnicp)
+{
+       xsvnicp->lro.lro_mgr.max_aggr = lro_max_aggr;
+       xsvnicp->lro.lro_mgr.max_desc = XSVNIC_MAX_LRO_DESCRIPTORS;
+       xsvnicp->lro.lro_mgr.lro_arr = xsvnicp->lro.lro_desc;
+       xsvnicp->lro.lro_mgr.get_skb_header = get_skb_hdr;
+       xsvnicp->lro.lro_mgr.features = LRO_F_NAPI;
+       xsvnicp->lro.lro_mgr.dev = xsvnicp->netdev;
+       xsvnicp->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+}
+
+static struct net_device_ops xsvnic_netdev_ops = {
+       .ndo_open = xsvnic_open,
+       .ndo_stop = xsvnic_stop,
+       .ndo_start_xmit = xsvnic_start_xmit,
+       .ndo_get_stats = xsvnic_get_stats,
+       .ndo_set_rx_mode = xsvnic_set_multicast,
+       .ndo_change_mtu = xsvnic_change_mtu,
+       .ndo_set_mac_address = xsvnic_set_mac_address,
+       .ndo_do_ioctl = xsvnic_ioctl,
+       .ndo_tx_timeout = xsvnic_tx_timeout,
+       .ndo_vlan_rx_add_vid = xsvnic_vlan_rx_add_vlanid,
+       .ndo_vlan_rx_kill_vid = xsvnic_vlan_rx_kill_vlanid
+};
+
+static int setup_netdev_info(struct net_device *netdev)
+{
+       struct xsvnic *xsvnicp = netdev_priv(netdev);
+       struct ib_device *hca = xsvnicp->xsmp_info.ib_device;
+       u64 mac;
+
+       netdev->watchdog_timeo = 10 * HZ;
+       netdev->tx_queue_len = xsvnic_tx_queue_len;
+       netdev->features |=
+           NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_SG | NETIF_F_GSO |
+           NETIF_F_GRO;
+       if (xsvnic_highdma)
+               netdev->features |= NETIF_F_HIGHDMA;
+       if (xsvnic_vlanaccel) {
+               pr_info("XSVNIC:%s Enabling vlan offloading ", __func__);
+               pr_info("[xsvnic %s]\n", xsvnicp->vnic_name);
+               netdev->features |= NETIF_F_HW_VLAN_CTAG_RX;
+       }
+       if (lro)
+               xsvnicp->lro_mode = 1;
+       /*
+        * based on install_flag setting setup TSO flag.
+        * Checksun & SG must be enabled by default
+        * also in case of TSO
+        * NETIF_F_HW_VLAN_TX | NETIF_F_TSO
+        */
+       if (xsvnicp->install_flag & (XSVNIC_INSTALL_TCP_OFFL |
+                                    XSVNIC_INSTALL_UDP_OFFL)
+           || xsvnic_force_csum_offload)
+               netdev->features |= NETIF_F_IP_CSUM;
+
+       if (xsvnicp->lro_mode) {
+               xsvnic_lro_setup(xsvnicp);
+               netdev->features |= NETIF_F_LRO;
+       }
+       xg_setup_pseudo_device(netdev, hca);
+
+       SET_NETDEV_OPS(netdev, &xsvnic_netdev_ops);
+       mac = be64_to_cpu(xsvnicp->mac);
+       memcpy(netdev->dev_addr, (u8 *) (&mac) + 2, ETH_ALEN);
+       netif_napi_add(netdev, &xsvnicp->napi, xsvnic_poll, napi_weight);
+       if (xsvnic_esx_preregister_setup(netdev))
+               return -EINVAL;
+       return register_netdev(netdev);
+}
+
+struct xsvnic *xsvnic_get_xsvnic_by_vid(u64 resource_id)
+{
+       struct xsvnic *xsvnicp;
+
+       list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+               if (xsvnicp->resource_id == resource_id)
+                       return xsvnicp;
+       }
+       return NULL;
+}
+
+struct xsvnic *xsvnic_get_xsvnic_by_name(char *vnic_name)
+{
+       struct xsvnic *xsvnicp;
+
+       list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+               if (strcmp(xsvnicp->vnic_name, vnic_name) == 0)
+                       return xsvnicp;
+       }
+       return NULL;
+}
+
+/*
+ * Handle install message
+ */
+
+static int xsvnic_xsmp_install(xsmp_cookie_t xsmp_hndl,
+                              struct xsvnic_xsmp_msg *xmsgp, void *data,
+                              int len)
+{
+       struct net_device *netdev;
+       struct xsvnic *xsvnicp;
+       u16 mp_flag;
+       char vnic_name[128];
+       int ret = 0;
+       u64 m;
+       u8 update_state = 0;
+       u8 ecode = 0;
+       u8 is_ha = 0;
+
+       XSMP_FUNCTION("%s:\n", __func__);
+
+       xsvnicp = xsvnic_get_xsvnic_by_vid(be64_to_cpu(xmsgp->resource_id));
+       if (xsvnicp) {
+               /*
+                * Duplicate VID, send ACK, send oper state update
+                */
+               XSMP_ERROR("%s: Duplicate VNIC install message name", __func__);
+               XSMP_ERROR(",: %s, ", xmsgp->vnic_name);
+               XSMP_ERROR("VID=0x%llx\n", be64_to_cpu(xmsgp->resource_id));
+               ret = -EEXIST;
+               clear_bit(XSVNIC_SYNC_DIRTY, &xsvnicp->state);
+               update_state = 1;
+               xsvnicp->xsmp_hndl = xsmp_hndl;
+               xsvnic_update_tca_info(xsvnicp, xmsgp, 0);
+               goto send_ack;
+       }
+
+       XSMP_INFO("Installing VNIC : %s, VID=0x%llx\n",
+                 xmsgp->vnic_name, be64_to_cpu(xmsgp->resource_id));
+
+       mp_flag = be16_to_cpu(xmsgp->mp_flag);
+       /*
+        * Append .P and .S to vnics
+        */
+       strncpy(vnic_name, xmsgp->vnic_name, sizeof(vnic_name) - 1);
+       if (mp_flag & (MP_XSVNIC_PRIMARY | MP_XSVNIC_SECONDARY)) {
+               if (xsvnic_havnic) {
+                       char *pos;
+
+                       strcpy(vnic_name, xmsgp->mp_group);
+
+                       pos = strchr(vnic_name, '.');
+                       if (pos != 0)
+                               *pos = 0;
+                       is_ha = 1;
+                       strncpy(xmsgp->vnic_name, vnic_name,
+                               sizeof(xmsgp->vnic_name) - 1);
+                       if (mp_flag & MP_XSVNIC_PRIMARY)
+                               strcat(vnic_name, "_P");
+                       else
+                               strcat(vnic_name, "_S");
+               } else {
+                       pr_warn("XSVNIC: %s HA vnic not ", xmsgp->vnic_name);
+                       pr_warn("supported\n");
+                       ret = -EINVAL;
+                       ecode = XSVNIC_NACK_ALLOCATION_ERROR;
+                       goto dup_error;
+               }
+       }
+
+       if (xcpm_check_duplicate_names(xsmp_hndl, vnic_name,
+                                      XSMP_MESSAGE_TYPE_XVE) != 0) {
+               pr_info("%s Duplicate name %s\n", __func__, vnic_name);
+               goto dup_error;
+       }
+
+       xsvnicp = xsvnic_get_xsvnic_by_name(vnic_name);
+       if (xsvnicp) {
+               XSMP_ERROR("%s: Duplicate name: %s, VID=0x%llx\n",
+                          __func__, xmsgp->vnic_name,
+                          be64_to_cpu(xmsgp->resource_id));
+               ret = -EEXIST;
+               ecode = XSVNIC_NACK_DUP_NAME;
+               goto dup_error;
+       }
+       /*
+        * Check for the long name vnic
+        */
+       if (strlen(vnic_name) > XSVNIC_VNIC_NAMELENTH) {
+               pr_err("XSVNIC: vnic_name %s,", xmsgp->vnic_name);
+               pr_err("length > 15 not supported\n");
+               ret = -EINVAL;
+               ecode = XSVNIC_NACK_INVALID;
+               goto dup_error;
+       }
+
+       netdev = alloc_netdev(sizeof(*xsvnicp), vnic_name, NET_NAME_UNKNOWN,
+                             &xsvnic_setup);
+       if (netdev == NULL) {
+               XSMP_ERROR("%s: alloc_netdev error name: %s, VID=0x%llx\n",
+                          __func__, xmsgp->vnic_name,
+                          be64_to_cpu(xmsgp->resource_id));
+               ret = -ENOMEM;
+               ecode = XSVNIC_NACK_ALLOCATION_ERROR;
+               goto dup_error;
+       }
+       xsvnicp = netdev_priv(netdev);
+       memset(xsvnicp, 0, sizeof(*xsvnicp));
+       xsvnicp->netdev = netdev;
+       INIT_LIST_HEAD(&xsvnicp->vlan_list);
+       INIT_LIST_HEAD(&xsvnicp->xsvnic_list);
+       init_completion(&xsvnicp->done);
+       mutex_init(&xsvnicp->mutex);
+       spin_lock_init(&xsvnicp->lock);
+       xsvnicp->resource_id = be64_to_cpu(xmsgp->resource_id);
+       xsvnicp->bandwidth = be16_to_cpu(xmsgp->vn_admin_rate);
+       m = xmsgp->mac_high;
+       xsvnicp->mac = m << 32 | xmsgp->mac_low;
+       memcpy(xsvnicp->vnic_name, vnic_name, XSVNIC_MAX_NAME_SIZE - 1);
+       xsvnicp->vnic_name[XSVNIC_MAX_NAME_SIZE - 1] = 0;
+       memcpy(xsvnicp->mp_group, xmsgp->mp_group, XSVNIC_MAX_NAME_SIZE - 1);
+       xsvnicp->mp_group[XSVNIC_MAX_NAME_SIZE - 1] = 0;
+       xsvnicp->sl = be16_to_cpu(xmsgp->service_level);
+       xsvnicp->mp_flag = be16_to_cpu(xmsgp->mp_flag);
+       xsvnicp->install_flag = be32_to_cpu(xmsgp->install_flag);
+       xsvnicp->mtu = be16_to_cpu(xmsgp->vn_mtu);
+       xsvnicp->tca_guid = be64_to_cpu(xmsgp->tca_guid);
+       xsvnicp->tca_lid = be16_to_cpu(xmsgp->tca_lid);
+       xsvnicp->xsmp_hndl = xsmp_hndl;
+       xcpm_get_xsmp_session_info(xsmp_hndl, &xsvnicp->xsmp_info);
+
+       /*
+        * In case of Non-HA set state to ACTIVE
+        */
+       if (!is_ha)
+               xsvnicp->ha_state = XSVNIC_HA_STATE_ACTIVE;
+       /*
+        * If MTU is JUMBO ot if it is LLE use default 256
+        */
+       if (xsvnicp->mtu > 1518 || !xsvnicp->xsmp_info.is_shca) {
+               xsvnicp->rx_ring_size = xsvnic_rxring_size;
+               xsvnicp->tx_ring_size = xsvnic_txring_size;
+       } else {
+               xsvnicp->rx_ring_size = xsvnic_rxring_size;
+               xsvnicp->tx_ring_size = xsvnic_txring_size;
+       }
+
+       netdev->mtu = xsvnicp->mtu;
+       /*
+        * Always set chassis ADMIN up by default
+        */
+       set_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state);
+       set_bit(XSVNIC_CHASSIS_ADMIN_SHADOW_UP, &xsvnicp->state);
+
+       INIT_DELAYED_WORK(&xsvnicp->sm_work, xsvnic_state_machine_work);
+
+       if (xsvnicp->install_flag & XSVNIC_INSTALL_TSO) {
+               xsvnicp->is_tso = 1;
+               /* BUG 22267 */
+               /* xsvnicp->lro_mode = 1; */
+               /*
+                * Add additional 8 bytes data for TSO header
+                */
+               netdev->hard_header_len += 8;
+               netdev->features |= NETIF_F_TSO;
+       }
+
+       if (xsvnic_conn_init(xsvnicp)) {
+               XSMP_ERROR("%s: xsvnic_conn_init error name: %s, VID=0x%llx\n",
+                          __func__, vnic_name,
+                          be64_to_cpu(xmsgp->resource_id));
+               ecode = XSVNIC_NACK_ALLOCATION_ERROR;
+               goto proc_error;
+       }
+
+       ret = xsvnic_add_proc_entry(xsvnicp);
+       if (ret) {
+               XSMP_ERROR("%s: procfs error name: %s, VID=0x%llx\n",
+                          __func__, vnic_name,
+                          be64_to_cpu(xmsgp->resource_id));
+               goto proc_error;
+       }
+
+       ret = setup_netdev_info(netdev);
+       if (ret) {
+               XSMP_ERROR("%s: setup_netdev_info error name: ,", __func__);
+               XSMP_ERROR("%s VID=0x%llx ret %x\n",
+                          vnic_name, be64_to_cpu(xmsgp->resource_id), ret);
+               ecode = XSVNIC_NACK_ALLOCATION_ERROR;
+               goto setup_netdev_info_error;
+       }
+
+       netif_carrier_off(netdev);
+       netif_stop_queue(netdev);
+
+       if (xsvnic_esx_postregister_setup(netdev)) {
+               ecode = XSVNIC_NACK_ALLOCATION_ERROR;
+               goto post_reg_err;
+       }
+       /*
+        * Add it to the list, mutex held for all XSMP processing
+        */
+       list_add_tail(&xsvnicp->xsvnic_list, &xsvnic_list);
+       pr_info("Installed XSVNIC vnic %s, ", vnic_name);
+       pr_info("VID=0x%llx, tca_guid: 0x%llx, tca lid: 0x%x tso %d\n",
+               xsvnicp->resource_id, xsvnicp->tca_guid,
+               xsvnicp->tca_lid, xsvnicp->is_tso);
+       /*
+        * Send ADMIN down and OPER down
+        */
+       xsvnic_send_msg_to_xsigod(xsmp_hndl, data, len);
+       atomic_inc(&xsvnicp->ref_cnt);
+       xsvnicp->sm_delay = 1000;
+       queue_sm_work(xsvnicp, 0);
+       /*
+        * Send ACK
+        */
+send_ack:
+       ret = xsvnic_xsmp_send_ack(xsmp_hndl, xmsgp);
+       if (ret) {
+               XSMP_ERROR
+                   ("%s: xsvnic_xsmp_send_ack error name: %s, VID=0x%llx\n",
+                    __func__, xmsgp->vnic_name,
+                    be64_to_cpu(xmsgp->resource_id));
+       }
+       if (update_state)
+               xsvnic_update_oper_state(xsvnicp);
+
+       return 0;
+
+post_reg_err:
+       unregister_netdev(netdev);
+setup_netdev_info_error:
+       xsvnic_remove_proc_entry(xsvnicp);
+proc_error:
+       free_netdev(netdev);
+dup_error:
+       (void)xsvnic_xsmp_send_nack(xsmp_hndl, xmsgp, sizeof(*xmsgp), ecode);
+       return ret;
+}
+
+static int xsvnic_remove_vnic(struct xsvnic *xsvnicp)
+{
+       struct vlan_entry *vlan, *tvlan;
+
+       mutex_lock(&xsvnicp->mutex);
+       xsvnic_io_disconnect(xsvnicp);
+       mutex_unlock(&xsvnicp->mutex);
+
+       xsvnic_put_ctx(xsvnicp);
+       /*
+        * Wait for refernce count to goto zero
+        */
+       while (atomic_read(&xsvnicp->ref_cnt)) {
+               DRV_ERROR("%s: Waiting for refcnt to become zero %d\n",
+                         __func__, atomic_read(&xsvnicp->ref_cnt));
+               msleep(100);
+       }
+       mutex_lock(&xsvnic_mutex);
+       list_del(&xsvnicp->xsvnic_list);
+       mutex_unlock(&xsvnic_mutex);
+       vmk_notify_uplink(xsvnicp->netdev);
+       unregister_netdev(xsvnicp->netdev);
+       pr_info("XSVNIC: %s deleted\n", xsvnicp->vnic_name);
+       xscore_conn_destroy(&xsvnicp->ctrl_conn.ctx);
+       xscore_conn_destroy(&xsvnicp->data_conn.ctx);
+       list_for_each_entry_safe(vlan, tvlan, &xsvnicp->vlan_list, vlan_list) {
+               list_del(&vlan->vlan_list);
+               kfree(vlan);
+       }
+
+       if (xsvnicp->mc_addrs != NULL)
+               kfree(xsvnicp->mc_addrs);
+
+       xsvnic_remove_proc_entry(xsvnicp);
+       if (!test_bit(XSVNIC_SHUTDOWN, &xsvnicp->state)) {
+               if (xsvnicp->mp_flag &
+                   (MP_XSVNIC_PRIMARY | MP_XSVNIC_SECONDARY)) {
+                       /*
+                        * Punt the message to xsigod to handle
+                        */
+                       xsvnic_send_cmd_to_xsigod(xsvnicp, XSMP_XSVNIC_DELETE);
+               }
+               /*
+                * Ideally need to figure out why userspace ACK is not working
+                */
+               xsvnic_xsmp_send_notification(xsvnicp->xsmp_hndl,
+                                             xsvnicp->resource_id,
+                                             XSMP_XSVNIC_DELETE);
+       }
+       free_netdev(xsvnicp->netdev);
+       return 0;
+}
+
+static int handle_admin_state_change(struct xsvnic *xsvnicp,
+                                    struct xsvnic_xsmp_msg *xmsgp)
+{
+       if (xmsgp->admin_state) {
+               XSMP_INFO("%s: VNIC %s Admin state up message\n", __func__,
+                         xsvnicp->vnic_name);
+               if (!test_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state)) {
+                       xsvnicp->counters[XSVNIC_ADMIN_UP_COUNTER]++;
+                       set_bit(XSVNIC_CHASSIS_ADMIN_SHADOW_UP,
+                               &xsvnicp->state);
+                       set_bit(XSVNIC_SEND_ADMIN_STATE, &xsvnicp->state);
+               }
+       } else {                /* Admin Down */
+               XSMP_INFO("%s: VNIC %s Admin state down message\n",
+                         __func__, xsvnicp->vnic_name);
+               if (test_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state)) {
+                       xsvnicp->counters[XSVNIC_ADMIN_DOWN_COUNTER]++;
+                       clear_bit(XSVNIC_CHASSIS_ADMIN_UP, &xsvnicp->state);
+                       clear_bit(XSVNIC_CHASSIS_ADMIN_SHADOW_UP,
+                                 &xsvnicp->state);
+                       set_bit(XSVNIC_SEND_ADMIN_STATE, &xsvnicp->state);
+               }
+       }
+       return 0;
+}
+
+static void xsvnic_xsmp_handle_oper_req(xsmp_cookie_t xsmp_hndl,
+                                       u64 resource_id)
+{
+       struct xsvnic *xsvnicp;
+
+       xsvnicp = xsvnic_get_xsvnic_by_vid(resource_id);
+       if (!xsvnicp) {
+               XSMP_ERROR("%s: request for invalid vid: 0x%llx\n",
+                          __func__, resource_id);
+               return;
+       }
+       XSMP_INFO("VNIC: %s Oper Req from chassis\n", xsvnicp->vnic_name);
+       xsvnicp->counters[XSVNIC_OPER_REQ_COUNTER]++;
+       xsvnic_xsmp_send_oper_state(xsvnicp, resource_id,
+                                   test_bit(XSVNIC_OPER_UP, &xsvnicp->state)
+                                   ? XSMP_XSVNIC_OPER_UP :
+                                   XSMP_XSVNIC_OPER_DOWN);
+}
+
+static void xsvnic_update_tca_info(struct xsvnic *xsvnicp,
+                                  struct xsvnic_xsmp_msg *xmsgp,
+                                  int set_oper_down)
+{
+       /*
+        * Ignore invalid tca info
+        */
+       if (be64_to_cpu(xmsgp->tca_guid) == 0
+           || be16_to_cpu(xmsgp->tca_lid) == 0)
+               return;
+       if (xsvnicp->tca_guid != be64_to_cpu(xmsgp->tca_guid) ||
+           xsvnicp->tca_lid != be16_to_cpu(xmsgp->tca_lid)) {
+               xsvnicp->counters[XSVNIC_XT_LID_CHANGE_COUNTER]++;
+               pr_info("XSVNIC %s TCA id changed from", xsvnicp->vnic_name);
+               pr_info("(0x%Lx:0x%d) to (0x%Lx:0x%d)\n",
+                       xsvnicp->tca_guid,
+                       xsvnicp->tca_lid,
+                       be64_to_cpu(xmsgp->tca_guid),
+                       be16_to_cpu(xmsgp->tca_lid));
+               xsvnicp->tca_guid = be64_to_cpu(xmsgp->tca_guid);
+               xsvnicp->tca_lid = be16_to_cpu(xmsgp->tca_lid);
+               xsvnicp->ctrl_conn.ctx.dguid = xsvnicp->tca_guid;
+               xsvnicp->data_conn.ctx.dguid = xsvnicp->tca_guid;
+               xsvnicp->ctrl_conn.ctx.dlid = xsvnicp->tca_lid;
+               xsvnicp->data_conn.ctx.dlid = xsvnicp->tca_lid;
+               if (set_oper_down)
+                       xsvnic_set_oper_down(xsvnicp, 1);
+       }
+}
+
+static int xsvnic_xsmp_update(xsmp_cookie_t xsmp_hndl,
+                             struct xsvnic_xsmp_msg *xmsgp)
+{
+       u32 bitmask = be32_to_cpu(xmsgp->bitmask);
+       struct xsvnic *xsvnicp;
+       int ret = 0;
+       int send_ack = 1;
+
+       xsvnicp = xsvnic_get_xsvnic_by_vid(be64_to_cpu(xmsgp->resource_id));
+       if (!xsvnicp) {
+               XSMP_ERROR("%s: request for invalid vid: 0x%llx\n",
+                          __func__, be64_to_cpu(xmsgp->resource_id));
+               return -EINVAL;
+       }
+
+       XSMP_INFO("%s: VNIC: %s bit mask: 0x%x\n", __func__,
+                 xsvnicp->vnic_name, bitmask);
+
+       mutex_lock(&xsvnicp->mutex);
+
+       if (bitmask & XSVNIC_UPDATE_ADMIN_STATE) {
+               ret = handle_admin_state_change(xsvnicp, xmsgp);
+               /*
+                * Ack will be sent once QP's are brought down
+                */
+               send_ack = 0;
+       }
+
+       if (bitmask & XSVNIC_XT_STATE_DOWN) {
+               XSMP_INFO("%s: VNIC %s XT state down message\n",
+                         __func__, xsvnicp->vnic_name);
+               xsvnicp->counters[XSVNIC_XT_DOWN_COUNTER]++;
+               set_bit(XSVNIC_XT_DOWN, &xsvnicp->state);
+               xsvnic_set_oper_down(xsvnicp, 1);
+       }
+
+       if (bitmask & XSVNIC_UPDATE_XT_CHANGE) {
+               XSMP_INFO("%s: VNIC %s XT state change message\n",
+                         __func__, xsvnicp->vnic_name);
+               xsvnicp->counters[XSVNIC_XT_UPDATE_COUNTER]++;
+               xsvnic_update_tca_info(xsvnicp, xmsgp, 1);
+               clear_bit(XSVNIC_XT_DOWN, &xsvnicp->state);
+       }
+
+       if (send_ack && xsvnic_xsmp_send_ack(xsmp_hndl, xmsgp)) {
+               XSMP_ERROR
+                   ("%s: xsvnic_xsmp_send_ack error name: %s, VID=0x%llx\n",
+                    __func__, xmsgp->vnic_name,
+                    be64_to_cpu(xmsgp->resource_id));
+       }
+       mutex_unlock(&xsvnicp->mutex);
+
+       return ret;
+}
+
+/*
+ * Called with global mutex held to protect xsvnic_list
+ */
+static void xsvnic_xsmp_sync_begin(xsmp_cookie_t xsmp_hndl, void *msg)
+{
+       struct xsvnic *xsvnicp;
+
+       list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+               if (xsmp_sessions_match(&xsvnicp->xsmp_info, xsmp_hndl)) {
+                       xsvnicp->xsmp_hndl = xsmp_hndl;
+                       /*
+                        * Do not handle SYNC_BEGIN end. SOmetimes bug
+                        * on IO director causes unnecessary delete
+                        */
+#if 0
+                       set_bit(XSVNIC_SYNC_DIRTY, &xsvnicp->state);
+#endif
+               }
+       }
+}
+
+static void xsvnic_update_oper_state(struct xsvnic *xsvnicp)
+{
+       if (xsvnicp->mp_flag & (MP_XSVNIC_PRIMARY | MP_XSVNIC_SECONDARY))
+               xsvnic_xsmp_send_ha_state(xsvnicp, xsvnicp->ha_state);
+       xsvnic_xsmp_send_oper_state(xsvnicp, xsvnicp->resource_id,
+                                   test_bit(XSVNIC_OPER_UP, &xsvnicp->state) ?
+                                   XSMP_XSVNIC_OPER_UP :
+                                   XSMP_XSVNIC_OPER_DOWN);
+}
+
+/*
+ * Called with global mutex held to protect xsvnic_list
+ */
+static void xsvnic_xsmp_sync_end(xsmp_cookie_t xsmp_hndl)
+{
+       struct xsvnic *xsvnicp;
+       unsigned long flags;
+
+       list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+               if (xsmp_sessions_match(&xsvnicp->xsmp_info, xsmp_hndl)) {
+                       if (test_bit(XSVNIC_SYNC_DIRTY, &xsvnicp->state)) {
+                               pr_info("XSVNIC %s ", xsvnicp->vnic_name);
+                               pr_info("deleted due to sync end condition\n");
+                               xsvnic_counters[XSVNIC_SYNC_END_DEL_COUNTER]++;
+                               spin_lock_irqsave(&xsvnicp->lock, flags);
+                               set_bit(XSVNIC_DELETING, &xsvnicp->state);
+                               spin_unlock_irqrestore(&xsvnicp->lock, flags);
+                       } else
+                               xsvnic_update_oper_state(xsvnicp);
+               }
+       }
+}
+
+/*
+ * We set the DELETING bit and let sm_work thread handle delete
+ */
+static void xsvnic_handle_del_message(xsmp_cookie_t xsmp_hndl,
+                                     struct xsvnic_xsmp_msg *xmsgp)
+{
+       struct xsvnic *xsvnicp;
+       unsigned long flags;
+
+       xsvnicp = xsvnic_get_xsvnic_by_vid(be64_to_cpu(xmsgp->resource_id));
+       if (!xsvnicp) {
+               xsvnic_counters[XSVNIC_VNIC_DEL_NOVID_COUNTER]++;
+               return;
+       }
+       spin_lock_irqsave(&xsvnicp->lock, flags);
+       set_bit(XSVNIC_DELETING, &xsvnicp->state);
+       spin_unlock_irqrestore(&xsvnicp->lock, flags);
+}
+
+static void xsvnic_send_cmd_to_xsigod(struct xsvnic *xsvnicp, int cmd)
+{
+       struct xsmp_message_header *xhdr;
+       struct xsvnic_xsmp_msg *xmsgp;
+       int tlen = sizeof(*xmsgp) + sizeof(*xhdr);
+
+       xhdr = xcpm_alloc_msg(tlen);
+       if (!xhdr)
+               return;
+       memset(xhdr, 0, tlen);
+       xhdr->type = XSMP_MESSAGE_TYPE_VNIC;
+       xhdr->length = tlen;
+       xmsgp = (struct xsvnic_xsmp_msg *)(xhdr + 1);
+       xmsgp->type = cmd;
+       strcpy(xmsgp->vnic_name, xsvnicp->vnic_name);
+       xmsgp->resource_id = cpu_to_be64(xsvnicp->resource_id);
+       xmsgp->mp_flag = cpu_to_be16(xsvnicp->mp_flag);
+       xmsgp->code = 0;
+       xmsgp->length = cpu_to_be16(sizeof(*xmsgp));
+       if (xcpm_send_msg_xsigod(xsvnicp->xsmp_hndl, xhdr, tlen))
+               xcpm_free_msg(xhdr);
+}
+
+static void xsvnic_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data,
+                                     int len)
+{
+       void *tmsg;
+
+       tmsg = xcpm_alloc_msg(len);
+       if (!tmsg)
+               return;
+       memcpy(tmsg, data, len);
+       if (xcpm_send_msg_xsigod(xsmp_hndl, tmsg, len))
+               xcpm_free_msg(tmsg);
+}
+
+static void xsvnic_handle_ip_req(xsmp_cookie_t xsmp_hndl, u8 *data, int len)
+{
+       struct xsvnic_xsmp_vlanip_msg *msgp =
+           (struct xsvnic_xsmp_vlanip_msg *)(data + sizeof(struct
+               xsmp_message_header));
+       struct xsvnic *xsvnicp;
+
+       XSMP_INFO("%s:XSMP message type VLAN IP\n", __func__);
+
+       xsvnicp = xsvnic_get_xsvnic_by_vid(be64_to_cpu(msgp->resource_id));
+       if (!xsvnicp) {
+               xsvnic_counters[XSVNIC_VNIC_DEL_NOVID_COUNTER]++;
+               return;
+       }
+       strcpy(msgp->ifname, xsvnicp->vnic_name);
+       msgp->mp_flag = cpu_to_be16(xsvnicp->mp_flag);
+       /*
+        * Punt this message to userspace
+        */
+       xsvnic_send_msg_to_xsigod(xsmp_hndl, data, len);
+}
+
+static void xsvnic_process_iscsi_info(xsmp_cookie_t xsmp_hndl, u8 *data,
+                                     int len)
+{
+       struct xsvnic_iscsi_msg *iscsi_msg = (struct xsvnic_iscsi_msg *)
+           (data + sizeof(struct xsmp_message_header));
+       struct xsvnic_iscsi_info *isp;
+       struct xsvnic *xsvnicp;
+
+       XSMP_INFO("%s:XSMP message type iscsi info\n", __func__);
+       xsvnicp =
+           xsvnic_get_xsvnic_by_vid(be64_to_cpu(iscsi_msg->iscsi_info.vid));
+       if (!xsvnicp) {
+               xsvnic_counters[XSVNIC_VNIC_DEL_NOVID_COUNTER]++;
+               return;
+       }
+       /*
+        * Now copy over iSCSI information
+        */
+       isp = &xsvnicp->iscsi_boot_info;
+       isp->vid = be64_to_cpu(iscsi_msg->iscsi_info.vid);
+       isp->vlan_id = be16_to_cpu(iscsi_msg->iscsi_info.vlan_id);
+       isp->mac = be64_to_cpu(iscsi_msg->iscsi_info.mac);
+       isp->protocol = be16_to_cpu(iscsi_msg->iscsi_info.protocol);
+       isp->port = be16_to_cpu(iscsi_msg->iscsi_info.port);
+       isp->lun = be16_to_cpu(iscsi_msg->iscsi_info.lun);
+       isp->mount_type = be16_to_cpu(iscsi_msg->iscsi_info.mount_type);
+       isp->role = iscsi_msg->iscsi_info.role;
+       isp->ip_type = iscsi_msg->iscsi_info.ip_type;
+       isp->ip_addr = iscsi_msg->iscsi_info.ip_addr;
+       isp->netmask = iscsi_msg->iscsi_info.netmask;
+       isp->gateway_ip_address = iscsi_msg->iscsi_info.gateway_ip_address;
+       isp->dns_ip_address = iscsi_msg->iscsi_info.dns_ip_address;
+       isp->target_ip_address = iscsi_msg->iscsi_info.target_ip_address;
+       memcpy(isp->vnic_name, iscsi_msg->iscsi_info.vnic_name,
+              XSVNIC_MAX_NAME_SIZE);
+       memcpy(isp->domain_name, iscsi_msg->iscsi_info.domain_name,
+              MAX_DOMAIN_NAME_LEN);
+       memcpy(isp->target_iqn, iscsi_msg->iscsi_info.target_iqn,
+              ISCSI_MOUNT_DEV_NAME_LEN);
+       memcpy(isp->target_portal_group,
+              iscsi_msg->iscsi_info.target_portal_group,
+              ISCSI_MOUNT_DEV_NAME_LEN);
+       memcpy(isp->initiator_iqn, iscsi_msg->iscsi_info.initiator_iqn,
+              ISCSI_MOUNT_DEV_NAME_LEN);
+       memcpy(isp->mount_dev, iscsi_msg->iscsi_info.mount_dev,
+              ISCSI_MOUNT_DEV_NAME_LEN);
+       memcpy(isp->mount_options, iscsi_msg->iscsi_info.mount_options,
+              ISCSI_MOUNT_DEV_NAME_LEN);
+       memcpy(isp->vol_group, iscsi_msg->iscsi_info.vol_group,
+              ISCSI_MOUNT_DEV_NAME_LEN);
+       memcpy(isp->vol_group_name, iscsi_msg->iscsi_info.vol_group_name,
+              ISCSI_MOUNT_DEV_NAME_LEN);
+}
+
+static void handle_xsvnic_xsmp_messages(xsmp_cookie_t xsmp_hndl, u8 *data,
+                                       int length)
+{
+       int hlen;
+       struct xsmp_message_header *header = (struct xsmp_message_header *)data;
+       struct xsvnic_xsmp_msg *xmsgp =
+           (struct xsvnic_xsmp_msg *)(data + sizeof(*header));
+
+       XSMP_FUNCTION("%s:\n", __func__);
+
+       if (length < sizeof(*header)) {
+               XSMP_ERROR("%s:XSMP message too short: act length: %d\n",
+                          __func__, length);
+               return;
+       }
+       hlen = be16_to_cpu(header->length);
+       if (hlen > length) {
+               XSMP_ERROR
+                   ("%s:XSMP header length greater than payload length %d\n",
+                    __func__, length);
+               return;
+       }
+       if (header->type != XSMP_MESSAGE_TYPE_VNIC) {
+               XSMP_ERROR("%s:XSMP message type not VNIC type: %d\n",
+                          __func__, header->type);
+               return;
+       }
+
+       XSMP_INFO("%s: XSMP message type: %d\n", __func__, xmsgp->type);
+
+       mutex_lock(&xsvnic_mutex);
+
+       switch (xmsgp->type) {
+       case XSMP_XSVNIC_VLANIP:
+               xsvnic_handle_ip_req(xsmp_hndl, data, length);
+               break;
+       case XSMP_XSVNIC_INFO_REQUEST:
+               break;
+       case XSMP_XSVNIC_INSTALL:
+               xsvnic_counters[XSVNIC_VNIC_INSTALL_COUNTER]++;
+               xsvnic_xsmp_install(xsmp_hndl, xmsgp, data, length);
+               break;
+       case XSMP_XSVNIC_DELETE:
+               xsvnic_handle_del_message(xsmp_hndl, xmsgp);
+               xsvnic_counters[XSVNIC_VNIC_DEL_COUNTER]++;
+               break;
+       case XSMP_XSVNIC_UPDATE:
+               xsvnic_counters[XSVNIC_VNIC_UPDATE_COUNTER]++;
+               xsvnic_xsmp_update(xsmp_hndl, xmsgp);
+               break;
+       case XSMP_XSVNIC_SYNC_BEGIN:
+               xsvnic_counters[XSVNIC_VNIC_SYNC_BEGIN_COUNTER]++;
+               xsvnic_xsmp_sync_begin(xsmp_hndl, xmsgp);
+               break;
+       case XSMP_XSVNIC_SYNC_END:
+               xsvnic_counters[XSVNIC_VNIC_SYNC_END_COUNTER]++;
+               xsvnic_xsmp_sync_end(xsmp_hndl);
+               break;
+       case XSMP_XSVNIC_OPER_REQ:
+               xsvnic_counters[XSVNIC_VNIC_OPER_REQ_COUNTER]++;
+               (void)xsvnic_xsmp_handle_oper_req(xsmp_hndl,
+                                                 be64_to_cpu(xmsgp->
+                                                             resource_id));
+               break;
+       case XSMP_XSVNIC_ISCSI_INFO:
+               xsvnic_counters[XSVNIC_ISCSI_INFO_COUNTER]++;
+               xsvnic_process_iscsi_info(xsmp_hndl, data, length);
+               break;
+       default:
+               xsvnic_counters[XSVNIC_VNIC_UNSUP_XSMP_COUNTER]++;
+               XSMP_ERROR("%s: Unsupported VNIX XSMP message: %d\n",
+                          __func__, xmsgp->type);
+               break;
+       }
+       mutex_unlock(&xsvnic_mutex);
+}
+
+static void handle_xsvnic_xsmp_messages_work(struct work_struct *work)
+{
+       struct xsvnic_work *xwork = container_of(work, struct xsvnic_work,
+                                                work);
+
+       (void)handle_xsvnic_xsmp_messages(xwork->xsmp_hndl, xwork->msg,
+                                         xwork->len);
+       kfree(xwork->msg);
+       kfree(xwork);
+}
+
+/*
+ * Called from thread context
+ */
+static void xsvnic_receive_handler(xsmp_cookie_t xsmp_hndl, u8 *msg,
+                                  int length)
+{
+       struct xsvnic_work *work;
+       unsigned long flags;
+
+       XSMP_FUNCTION("%s:\n", __func__);
+
+       work = kmalloc(sizeof(*work), GFP_KERNEL);
+       if (!work) {
+               XSMP_ERROR("%s: Out of memory\n", __func__);
+               kfree(msg);
+               return;
+       }
+       INIT_WORK(&work->work, handle_xsvnic_xsmp_messages_work);
+       work->xsmp_hndl = xsmp_hndl;
+       work->msg = msg;
+       work->len = length;
+       spin_lock_irqsave(&xsvnic_lock, flags);
+       /*
+        * Do some checks here
+        * Add counter
+        */
+       queue_work(xsvnic_wq, &work->work);
+       spin_unlock_irqrestore(&xsvnic_lock, flags);
+}
+
+/*
+ * Needs to be called with xsvnic_mutex lock held
+ */
+static void xsvnic_wait_for_removal(xsmp_cookie_t xsmp_hndl)
+{
+       int is_pres;
+       struct xsvnic *xsvnicp;
+
+       while (1) {
+               is_pres = 0;
+               list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+                       if (xsmp_sessions_match(&xsvnicp->xsmp_info, xsmp_hndl))
+                               is_pres = 1;
+               }
+               if (is_pres) {
+                       mutex_unlock(&xsvnic_mutex);
+                       msleep(100);
+                       mutex_lock(&xsvnic_mutex);
+               } else
+                       break;
+       }
+}
+
+/*
+ * Called from thread context
+ */
+static void xsvnic_xsmp_event_handler(xsmp_cookie_t xsmp_hndl, int event)
+{
+       struct xsvnic *xsvnicp;
+       unsigned long flags;
+
+       mutex_lock(&xsvnic_mutex);
+
+       switch (event) {
+       case XSCORE_PORT_UP:
+       case XSCORE_PORT_DOWN:
+               list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+                       if (xsmp_sessions_match(&xsvnicp->xsmp_info,
+                               xsmp_hndl)) {
+                               if (event == XSCORE_PORT_DOWN) {
+                                       set_bit(XSVNIC_IBLINK_DOWN,
+                                               &xsvnicp->state);
+                                       xsvnic_set_oper_down(xsvnicp, 1);
+                                       xsvnicp->counters
+                                           [XSVNIC_IBLINK_DOWN_COUNTER]++;
+                               } else {
+                                       clear_bit(XSVNIC_IBLINK_DOWN,
+                                                 &xsvnicp->state);
+                                       xsvnicp->counters
+                                           [XSVNIC_IBLINK_UP_COUNTER]++;
+                               }
+                       }
+               }
+               break;
+       case XSCORE_DEVICE_REMOVAL:
+               xsvnic_counters[XSVNIC_DEVICE_REMOVAL_COUNTER]++;
+               list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+                       if (xsmp_sessions_match(&xsvnicp->xsmp_info,
+                               xsmp_hndl)) {
+                               spin_lock_irqsave(&xsvnicp->lock, flags);
+                               set_bit(XSVNIC_DELETING, &xsvnicp->state);
+                               spin_unlock_irqrestore(&xsvnicp->lock, flags);
+                       }
+               }
+               /*
+                * Now wait for all the vnics to be deleted
+                */
+               xsvnic_wait_for_removal(xsmp_hndl);
+               break;
+       case XSCORE_CONN_CONNECTED:
+               list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+                       if (xsmp_sessions_match(&xsvnicp->xsmp_info, xsmp_hndl))
+                               xsvnicp->xsmp_hndl = xsmp_hndl;
+               }
+               break;
+       default:
+               break;
+       }
+
+       mutex_unlock(&xsvnic_mutex);
+}
+
+static int xsvnic_xsmp_callout_handler(char *name)
+{
+       struct xsvnic *xsvnicp;
+       int ret = 0;
+
+       mutex_lock(&xsvnic_mutex);
+       list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+               /* CHECK for duplicate name */
+               if (strcmp(xsvnicp->vnic_name, name) == 0) {
+                       ret = -EINVAL;
+                       break;
+               }
+       }
+       mutex_unlock(&xsvnic_mutex);
+       return ret;
+}
+
+int xsvnic_xsmp_init(void)
+{
+       struct xsmp_service_reg_info service_info = {
+               .receive_handler = xsvnic_receive_handler,
+               .event_handler = xsvnic_xsmp_event_handler,
+               .callout_handler = xsvnic_xsmp_callout_handler,
+               .ctrl_message_type = XSMP_MESSAGE_TYPE_VNIC,
+               .resource_flag_index = RESOURCE_FLAG_INDEX_VNIC
+       };
+
+       struct xsmp_service_reg_info service_info_ha = {
+               .ctrl_message_type = XSMP_MESSAGE_TYPE_SESSION
+       };
+
+       xsvnic_xsmp_service_id = xcpm_register_service(&service_info);
+       if (xsvnic_xsmp_service_id < 0)
+               return xsvnic_xsmp_service_id;
+
+       if (!xsvnic_havnic) {
+               service_info_ha.resource_flag_index = RESOURCE_FLAG_INDEX_NO_HA;
+               xsigo_session_service_id =
+                   xcpm_register_service(&service_info_ha);
+               if (xsigo_session_service_id < 0)
+                       return xsigo_session_service_id;
+       }
+
+       return 0;
+}
+
+void xsvnic_xsmp_exit(void)
+{
+       (void)xcpm_unregister_service(xsvnic_xsmp_service_id);
+       xsvnic_xsmp_service_id = -1;
+       if (!xsvnic_havnic) {
+               (void)xcpm_unregister_service(xsigo_session_service_id);
+               xsigo_session_service_id = -1;
+       }
+
+}
+
+int xsvnic_wait_for_first(void)
+{
+       int secs = xsvnic_wait_per_vnic;
+
+       /* Total wait is xsvnic_wait_for_vnic seconds */
+       mutex_lock(&xsvnic_mutex);
+       DRV_INFO("%s: Checking for first Vnic to be up\n", __func__);
+       while (list_empty(&xsvnic_list) && secs) {
+               mutex_unlock(&xsvnic_mutex);
+               msleep(1000);
+               secs--;
+               mutex_lock(&xsvnic_mutex);
+       }
+       mutex_unlock(&xsvnic_mutex);
+       DRV_INFO("%s: Finished Waiting for first Vnic to be up\n", __func__);
+       return secs > 0;
+}
+
+int xsvnic_all_up(void)
+{
+       int allup = 1;
+       struct xsvnic *xsvnicp;
+
+       mutex_lock(&xsvnic_mutex);
+       list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+               if (!test_bit(XSVNIC_OPER_UP, &xsvnicp->state))
+                       allup = 0;
+       }
+       mutex_unlock(&xsvnic_mutex);
+       return allup;
+}
+
+static int xsvnic_wait_for_all_vnics_up(void)
+{
+       int time, delayms = 200;
+
+       /* Total wait is xsvnic_wait_for_vnic seconds */
+       DRV_INFO("%s: Checking for VNIC's to be up\n", __func__);
+       for (time = 0; time < xsvnic_wait_per_vnic * 1000; time += delayms) {
+               if (xsvnic_all_up()) {
+                       DRV_INFO("%s: VNIC's are up\n", __func__);
+                       return 1;
+               }
+               msleep(delayms);
+       }
+       DRV_INFO("%s: VNIC's are not up\n", __func__);
+       return 0;
+}
+
+static void xsvnic_wait_for_vnics(void)
+{
+       unsigned long wait_time = jiffies;
+
+       if (xsvnic_wait_in_boot && xscore_wait_for_sessions(0)) {
+               pr_info("XSVNIC: Waiting for VNIC's to come up .....\n");
+               if (xsvnic_wait_for_first())
+                       xsvnic_wait_for_all_vnics_up();
+               else
+                       DRV_INFO("%s: No VNIC's present\n", __func__);
+       }
+       xsvnic_wait_time = jiffies - wait_time;
+}
+
+/*
+ * Module initialization entry point
+ */
+
+static int __init xsvnic_init(void)
+{
+       int ret;
+
+       DRV_FUNCTION("%s\n", __func__);
+
+       spin_lock_init(&xsvnic_lock);
+       INIT_LIST_HEAD(&xsvnic_list);
+       mutex_init(&xsvnic_mutex);
+       xsvnic_wq = create_singlethread_workqueue("xsv_wq");
+       if (!xsvnic_wq) {
+               DRV_ERROR("%s: create_singlethread_workqueue failed\n",
+                         __func__);
+               return -ENOMEM;
+       }
+       xsvnic_io_wq = create_singlethread_workqueue("xsviowq");
+       if (!xsvnic_io_wq) {
+               DRV_ERROR("%s: create_singlethread_workqueue failed\n",
+                         __func__);
+               ret = -ENOMEM;
+               goto io_wq_error;
+       }
+       ret = xsvnic_create_procfs_root_entries();
+       if (ret) {
+               DRV_ERROR("%s: xsvnic_create_procfs_root_entries failed %d\n",
+                         __func__, ret);
+               goto proc_error;
+       }
+       ret = xsvnic_xsmp_init();
+       if (ret) {
+               DRV_ERROR("%s: xsvnic_xsmp_init failed %d\n", __func__, ret);
+               goto xsmp_err;
+       }
+       /* Wait for VNIC's to come up */
+       xsvnic_wait_for_vnics();
+       return ret;
+
+xsmp_err:
+       xsvnic_remove_procfs_root_entries();
+io_wq_error:
+       destroy_workqueue(xsvnic_io_wq);
+proc_error:
+       destroy_workqueue(xsvnic_wq);
+       return ret;
+}
+
+static void __exit xsvnic_exit(void)
+{
+       struct xsvnic *xsvnicp;
+       unsigned long flags;
+
+       DRV_FUNCTION("%s\n", __func__);
+       xsvnic_xsmp_exit();
+       mutex_lock(&xsvnic_mutex);
+       list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+               spin_lock_irqsave(&xsvnicp->lock, flags);
+               set_bit(XSVNIC_DELETING, &xsvnicp->state);
+               set_bit(XSVNIC_SHUTDOWN, &xsvnicp->state);
+               spin_unlock_irqrestore(&xsvnicp->lock, flags);
+       }
+       while (!list_empty(&xsvnic_list)) {
+               mutex_unlock(&xsvnic_mutex);
+               msleep(100);
+               mutex_lock(&xsvnic_mutex);
+       }
+       mutex_unlock(&xsvnic_mutex);
+       flush_workqueue(xsvnic_wq);
+       destroy_workqueue(xsvnic_wq);
+       flush_workqueue(xsvnic_io_wq);
+       destroy_workqueue(xsvnic_io_wq);
+       xsvnic_remove_procfs_root_entries();
+}
+
+int xsvnic_iscsi_present(void)
+{
+       int pres = 0;
+       struct xsvnic *xsvnicp;
+
+       mutex_lock(&xsvnic_mutex);
+       list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+               if (xsvnicp->iscsi_boot_info.initiator_iqn[0] != '\0')
+                       pres = 1;
+       }
+       mutex_unlock(&xsvnic_mutex);
+       return pres;
+}
+EXPORT_SYMBOL(xsvnic_iscsi_present);
+
+int xsvnic_get_all_names(char **names, int max)
+{
+       struct xsvnic *xsvnicp;
+       int count = 0;
+
+       mutex_lock(&xsvnic_mutex);
+       list_for_each_entry(xsvnicp, &xsvnic_list, xsvnic_list) {
+               if (count < max)
+                       names[count++] =
+                           kstrdup(xsvnicp->vnic_name, GFP_KERNEL);
+       }
+       mutex_unlock(&xsvnic_mutex);
+       return count;
+}
+EXPORT_SYMBOL(xsvnic_get_all_names);
+
+module_init(xsvnic_init);
+module_exit(xsvnic_exit);
diff --git a/drivers/infiniband/ulp/xsigo/xsvnic/xsvnic_stats.c b/drivers/infiniband/ulp/xsigo/xsvnic/xsvnic_stats.c
new file mode 100644 (file)
index 0000000..05c1aee
--- /dev/null
@@ -0,0 +1,841 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/fs.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/version.h>
+#include <linux/moduleparam.h>
+
+#include "xsvnic.h"
+
+static int xs_seq_file;
+module_param(xs_seq_file, int, 0644);
+
+MODULE_PARM_DESC(xs_seq_file,
+                "Enabling the sequence files to print large data in /proc entries");
+
+static char *glob_counter_name[XSVNIC_MAX_GLOB_COUNTERS] = {
+       "sync end del count:\t\t",
+       "vnic install count:\t\t",
+       "vnic del count:\t\t\t",
+       "vnic del novid count:\t\t",
+       "vnic update count:\t\t",
+       "vnic sync begin count:\t\t",
+       "vnic sync end count:\t\t",
+       "vnic oper req count:\t\t",
+       "vnic unsup cmd count:\t\t",
+       "iscsi info count:\t\t",
+       "xscore device remove count:\t",
+};
+
+static char *counter_name[XSVNIC_MAX_COUNTERS] = {
+       "ctrl_heartbeat_count:\t\t",
+       "data_heartbeat_count:\t\t",
+       "hbeat send error count:\t\t",
+       "napi_poll_count:\t\t",
+       "short_tx_pkt_count:\t\t",
+       "tx_skb_count:\t\t\t",
+       "tx_skb_tso_count:\t\t",
+       "tx_skb_noheadroom_count:\t",
+       "tx skb free count:\t\t",
+       "tx skb free count (reaped):\t",
+       "tx head expand count:\t\t",
+       "tx head expand error count:\t",
+       "tx vlan count:\t\t\t",
+       "tx error count:\t\t\t",
+       "tx wrb exhaust:\t\t\t",
+       "tx drop oper down count:\t",
+       "tx drop skb error count:\t",
+       "tx skb expand error count:\t",
+       "tx drop ring full count:\t",
+       "rx_skb_count:\t\t\t",
+       "rx_skb_alloc_count:\t\t",
+       "rx_skb_sendtovlangrp:\t\t",
+       "rx_skb_batched_count:\t\t",
+       "rx_skb_freed_count:\t\t",
+       "rx_bat_maxsegs_count:\t\t",
+       "rx_bat_numsegs_below_5:\t\t",
+       "rx_bat_numsegs_between_5_10:\t",
+       "rx_bat_numsegs_between_10_20:\t",
+       "rx_bat_numsegs_above_20:\t",
+       "rx_bat_8k_segs_count:\t\t",
+       "rx skb offload count:\t\t",
+       "rx skb offl frag count:\t\t",
+       "rx skb offlnonipv4 count:\t",
+       "rx error count:\t\t\t",
+       "rx quota exceeded count:\t",
+       "rx no buf count:\t\t",
+       "rx max packet:\t\t\t",
+       "rx min packet:\t\t\t",
+       "rx lro Aggregated Packet count:\t",
+       "rx lro Flushed count:\t\t",
+       "rx lro Average Aggregated Count:\t",
+       "rx lro No Descriptor Count:\t",
+       "tx max packet:\t\t\t",
+       "tx min packet:\t\t\t",
+       "tx max time spent:\t\t",
+       "tx min time spent:\t\t",
+       "napi sched count:\t\t",
+       "napi notsched count:\t\t",
+       "io port up count:\t\t",
+       "io port down count:\t\t",
+       "io dup port up count:\t\t",
+       "io dup port down count:\t\t",
+       "start rx sent count:\t\t",
+       "stop rx sent count:\t\t",
+       "start rx resp count:\t\t",
+       "rx bad resp count:\t\t",
+       "open count:\t\t\t",
+       "stop count:\t\t\t",
+       "getstats count:\t\t\t",
+       "set mcast count:\t\t",
+       "multicast resp count:\t\t",
+       "multicast no resp count:\t",
+       "vlan add count:\t\t\t",
+       "vlan del count:\t\t\t",
+       "ioctl count:\t\t\t",
+       "mac addr change:\t\t",
+       "wdog timeout count:\t\t",
+       "oper req count:\t\t\t",
+       "xt down count:\t\t\t",
+       "xt update count:\t\t",
+       "xt lid change  count:\t\t",
+       "admin up count:\t\t\t",
+       "admin down count:\t\t",
+       "sm poll count:\t\t\t",
+       "qp error count:\t\t\t",
+       "IB recovery count:\t\t",
+       "IB recovered count:\t\t",
+       "IB link down count:\t\t",
+       "IB link up count:\t\t",
+       "ctrl conn ok count:\t\t",
+       "ctrl rdisc count:\t\t",
+       "ctrl conn err count:\t\t",
+       "ctrl recv err count:\t\t",
+       "data conn ok count:\t\t",
+       "data rdisc count:\t\t",
+       "data conn err count:\t\t",
+       "sent oper up count:\t\t",
+       "sent oper down count:\t\t",
+       "sent oper state failure count:\t",
+       "sent oper state success count:\t",
+       "drop rx standby count:\t\t",
+       "drop tx standby count:\t\t",
+};
+
+#define atoi(str)       kstrtoul(((str != NULL) ? str : ""), -1, 0)
+#define XS_RESCHED_NAPI        "napi_sched"
+#define XS_READIB_BUF  "read_ibbuf"
+#define XS_RXBATCHING_ON       "rbatch on"
+#define XS_RXBATCHING_OFF      "rbatch off"
+#define XS_SLAVE_ACTIVE                "slave active"
+#define XS_SLAVE_BACKUP                "slave backup"
+
+struct proc_dir_entry *proc_root_xsvnic = NULL;
+struct proc_dir_entry *proc_root_xsvnic_dev = NULL;
+struct proc_dir_entry *iscsi_boot = NULL;
+
+static ssize_t xsvnic_proc_write_debug(struct file *file,
+                                      const char __user *buffer, size_t count,
+                                      loff_t *offp);
+static int xsvnic_proc_read_debug(struct seq_file *m, void *data);
+static int xsvnic_proc_open_debug(struct inode *inode, struct file *file);
+static ssize_t xsvnic_proc_write_iscsi_boot(struct file *file,
+                                           const char __user *buffer,
+                                           size_t count, loff_t *offp);
+static int xsvnic_proc_read_iscsi_boot(struct seq_file *m, void *data);
+static int xsvnic_proc_open_iscsi_boot(struct inode *inode, struct file *file);
+static ssize_t xsvnic_proc_write_device(struct file *file,
+                                       const char __user *buffer,
+                                       size_t count, loff_t *offp);
+static int xsvnic_proc_read_device(struct seq_file *m, void *data);
+static int xsvnic_proc_open_device(struct inode *inode, struct file *file);
+static ssize_t xsvnic_proc_write_device_counters(struct file *file,
+                                                const char __user *buffer,
+                                                size_t count, loff_t *offp);
+static int xsvnic_proc_read_device_counters(struct seq_file *m, void *data);
+static int xsvnic_proc_open_device_counters(struct inode *inode,
+                                           struct file *file);
+static void *xsvnic_seq_start(struct seq_file *seq, loff_t *pos);
+static void *xsvnic_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+static int xsvnic_seq_show(struct seq_file *seq, void *v);
+static void xsvnic_seq_stop(struct seq_file *seq, void *v);
+static int xsvnic_open(struct inode *inode, struct file *file);
+
+static const struct file_operations xsvnic_debug_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xsvnic_proc_open_debug,
+       .read = seq_read,
+       .write = xsvnic_proc_write_debug,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations xsvnic_iscsi_boot_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xsvnic_proc_open_iscsi_boot,
+       .read = seq_read,
+       .write = xsvnic_proc_write_iscsi_boot,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations xsvnic_device_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xsvnic_proc_open_device,
+       .read = seq_read,
+       .write = xsvnic_proc_write_device,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations xsvnic_device_counters_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xsvnic_proc_open_device_counters,
+       .read = seq_read,
+       .write = xsvnic_proc_write_device_counters,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct seq_operations xsvnic_seq_ops = {
+       .start = xsvnic_seq_start,
+       .next = xsvnic_seq_next,
+       .stop = xsvnic_seq_stop,
+       .show = xsvnic_seq_show
+};
+
+static const struct file_operations xsvnic_file_ops = {
+       .owner = THIS_MODULE,
+       .open = xsvnic_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = seq_release
+};
+
+static int xsvnic_proc_read_device(struct seq_file *m, void *data)
+{
+       struct xsvnic *vp = m->private;
+       unsigned long tsecs = 0, tmins = 0, thrs = 0;
+       char tmp_buf[512];
+
+       seq_printf(m, "Admin state:\t\t\t%s\n",
+                  test_bit(XSVNIC_CHASSIS_ADMIN_UP,
+                           &vp->state) ? "Up" : "Down");
+       seq_printf(m, "Chassis Name:\t\t\t%s\n", vp->xsmp_info.chassis_name);
+       seq_printf(m, "Chassis Version:\t\t%x\n", vp->xsmp_info.version);
+       seq_printf(m, "Server-Profile Name:\t\t%s\n",
+                  vp->xsmp_info.session_name);
+       seq_puts(m, "Config parameters:\n");
+       seq_printf(m, "TCA GUID:\t\t\t0x%Lx\n", vp->tca_guid);
+       seq_printf(m, "TCA lid:\t\t\t0x%x\n", vp->tca_lid);
+       seq_printf(m, "MAC addr:\t\t\t0x%Lx\n", vp->mac);
+       seq_printf(m, "VID:\t\t\t\t0x%Lx\n", vp->resource_id);
+       seq_printf(m, "mtu:\t\t\t\t%d\n", vp->mtu);
+       seq_printf(m, "ring size:\t\t\t%d\n", vp->rx_ring_size);
+       seq_printf(m, "bandwidth:\t\t\t%d\n", vp->bandwidth);
+       seq_puts(m, "\n");
+       seq_printf(m, "link/xsmp hndl:\t\t\t%p\n", vp->xsmp_hndl);
+       seq_printf(m, "Port link state: \t\t%s\n",
+                  test_bit(XSVNIC_PORT_LINK_UP, &vp->state) ? "Up" : "Down");
+       seq_printf(m, "Port link speed: \t\t%d Mbps\n", vp->port_speed);
+
+       strcpy(tmp_buf, "None");
+       if (vp->mp_flag & MP_XSVNIC_PRIMARY) {
+               strcpy(tmp_buf, "Primary");
+               if (vp->mp_flag & MP_XSVNIC_AUTO_SWITCH)
+                       strcat(tmp_buf, " + AutoSwitchover");
+       } else if (vp->mp_flag & MP_XSVNIC_SECONDARY) {
+               strcpy(tmp_buf, "Secondary");
+               if (vp->mp_flag & MP_XSVNIC_AUTO_SWITCH)
+                       strcat(tmp_buf, " + AutoSwitchover");
+       }
+
+       seq_printf(m, "HA flags:\t\t\t%s\n", tmp_buf);
+
+       seq_printf(m, "netdev features:\t\t0x%x\n", (u32) vp->netdev->features);
+
+       seq_printf(m, "Checksum offload:\t\t%s\n",
+                  (vp->install_flag &
+                   (XSVNIC_INSTALL_TCP_OFFL | XSVNIC_INSTALL_UDP_OFFL))
+                  ? "Enabled" : "Disabled");
+
+       seq_printf(m, "TSO:\t\t\t\t%s\n",
+                  (vp->netdev->
+                   features & NETIF_F_TSO) ? "Enabled" : "Disabled");
+
+       seq_printf(m, "LRO:\t\t\t\t%s\n",
+                  (vp->lro_mode) ? "Enabled" : "Disabled");
+
+       seq_printf(m, "RX batching :\t\t\t%s\n",
+                  (vp->is_rxbatching) ? "Enabled" : "Disabled");
+
+       seq_printf(m, "8k IB mtu :\t\t\t%s\n",
+                  ((vp->install_flag & XSVNIC_8K_IBMTU)
+                   && vp->xsmp_info.is_shca)
+                  ? "Enabled" : "Disabled");
+       seq_printf(m, "VLAN offload :\t\t\t%s\n",
+                  (xsvnic_vlanaccel != 0) ? "Enabled" : "Disabled");
+       seq_printf(m, "vlan count:\t\t\t%d\n", vp->vlan_count);
+       seq_printf(m, "mcast count:\t\t\t%d (promisc: %s)\n",
+                  vp->mc_count, vp->iff_promisc ? "on" : "off");
+
+       seq_printf(m,
+                  "Data Connection:\t\t%s (%d), Mode: %s InterruptMode for TX: %s RX: %s\n",
+                  vp->data_conn.state ==
+                  XSVNIC_CONN_CONNECTED ? "Connected" : "Not connected",
+                  vp->data_conn.state,
+                  vp->data_conn.ctx.
+                  features & XSCORE_USE_CHECKSUM ? "Checksum" : "ICRC",
+                  vp->data_conn.ctx.
+                  features & XSCORE_NO_SEND_COMPL_INTR ? "Disabled" :
+                  "Enabled",
+                  vp->data_conn.ctx.
+                  features & XSCORE_NO_RECV_COMPL_INTR ? "Disabled" :
+                  "Enabled");
+
+       seq_printf(m, "Control Connection:\t\t%s (%d), Mode: %s\n",
+                  vp->ctrl_conn.state == XSVNIC_CONN_CONNECTED ?
+                  "Connected" : "Not connected", vp->ctrl_conn.state,
+                  vp->ctrl_conn.ctx.
+                  features & XSCORE_USE_CHECKSUM ? "Checksum" : "ICRC");
+       seq_puts(m, "Interrupt Coalescing parameters\n");
+       seq_printf(m, "TX:\t\t\t\t MaxUSeconds: %d MaxFrames: %d\n",
+                  vp->data_conn.ctx.tx_coalesce_usecs,
+                  vp->data_conn.ctx.tx_max_coalesced_frames);
+       seq_printf(m, "RX:\t\t\t\t MaxUSeconds: %d MaxFrames: %d\n",
+                  vp->data_conn.ctx.rx_coalesce_usecs,
+                  vp->data_conn.ctx.rx_max_coalesced_frames);
+
+       if (vp->data_conn.state == XSVNIC_CONN_CONNECTED &&
+           vp->ctrl_conn.state == XSVNIC_CONN_CONNECTED) {
+               int lqpn, dqpn;
+
+               tsecs = jiffies_to_msecs(jiffies - vp->jiffies) / 1000;
+               thrs = tsecs / (60 * 60);
+               tmins = (tsecs / 60 - (thrs * 60));
+               tsecs = tsecs - (tmins * 60) - (thrs * 60 * 60);
+
+               lqpn = vp->ctrl_conn.ctx.local_qpn;
+               dqpn = vp->ctrl_conn.ctx.remote_qpn;
+               seq_printf(m,
+                          "Ctrl QP end points:\t\t(0x%x, %d) : (0x%x, %d)\n",
+                          lqpn, lqpn, dqpn, dqpn);
+
+               lqpn = vp->data_conn.ctx.local_qpn;
+               dqpn = vp->data_conn.ctx.remote_qpn;
+               seq_printf(m,
+                          "Data QP end points:\t\t(0x%x, %d) : (0x%x, %d)\n",
+                          lqpn, lqpn, dqpn, dqpn);
+       }
+       seq_printf(m, "XSVNIC Uptime:\t\t\t%lu hrs %lu mins %lu seconds\n",
+                  thrs, tmins, tsecs);
+       seq_puts(m, "\n");
+
+       seq_puts(m, "Operational state:\n");
+       if (vp->mp_flag & (MP_XSVNIC_PRIMARY | MP_XSVNIC_SECONDARY)) {
+               seq_printf(m, "HA VNIC state:\t\t\t%s\n",
+                          vp->ha_state ==
+                          XSVNIC_HA_STATE_STANDBY ? "Standby" : "Active");
+               seq_printf(m, "HA Active State:\t\t%s\n",
+                          test_bit(XSVNIC_STATE_STDBY,
+                                   &vp->
+                                   state) ? XS_SLAVE_BACKUP : XS_SLAVE_ACTIVE);
+       }
+
+       seq_printf(m, "Netdev state:\t\t\t0x%lu\n", vp->netdev->state);
+       seq_printf(m, "Netdev napi state:\t\t0x%lu\n", vp->napi.state);
+
+       tmp_buf[0] = 0;
+       if (netif_running(vp->netdev))
+               strcat(tmp_buf, "netdev running");
+       else
+               strcat(tmp_buf, "netif not running");
+       if (netif_queue_stopped(vp->netdev))
+               strcat(tmp_buf, " + netdev stopped");
+       else
+               strcat(tmp_buf, " + netdev not stopped");
+
+       seq_printf(m, "%s\n\n", tmp_buf);
+
+       seq_printf(m, "Carrier state:\t\t\t%s\n",
+                  netif_carrier_ok(vp->netdev) ? "Up" : "Down");
+
+       seq_printf(m, "VNIC up:\t\t\t%s\n",
+                  test_bit(XSVNIC_OPER_UP, &vp->state) ? "Yes" : "No");
+
+       seq_printf(m, "VNIC state:\t\t\t0x%x\n", (unsigned int)vp->state);
+       tmp_buf[0] = 0;
+       if (test_bit(XSVNIC_OPER_UP, &vp->state))
+               strcat(tmp_buf, "Oper Up");
+       else
+               strcat(tmp_buf, "Oper Down");
+       if (test_bit(XSVNIC_OS_ADMIN_UP, &vp->state))
+               strcat(tmp_buf, " + OS Admin Up");
+       else
+               strcat(tmp_buf, " + OS Admin Down");
+       if (test_bit(XSVNIC_CHASSIS_ADMIN_UP, &vp->state))
+               strcat(tmp_buf, " + Chassis Admin Up");
+       else
+               strcat(tmp_buf, " + Chassis Admin Down");
+       if (test_bit(XSVNIC_PORT_LINK_UP, &vp->state))
+               strcat(tmp_buf, " + Port Link Up");
+       else
+               strcat(tmp_buf, " + Port Link Down");
+       if (test_bit(XSVNIC_START_RX_SENT, &vp->state))
+               strcat(tmp_buf, " + Start Rx Sent");
+       else
+               strcat(tmp_buf, " + No Start Rx");
+       if (test_bit(XSVNIC_START_RESP_RCVD, &vp->state))
+               strcat(tmp_buf, " + Start Rx Resp Rcvd");
+       else
+               strcat(tmp_buf, " + No Start Rx Resp");
+
+       if (test_bit(XSVNIC_INTR_ENABLED, &vp->state))
+               strcat(tmp_buf, " + Rx Intr Enabled");
+       else
+               strcat(tmp_buf, " + Rx Intr Disabled");
+
+       if (test_bit(XSVNIC_RX_NOBUF, &vp->state))
+               strcat(tmp_buf, " + Rx No Buf");
+
+       if (test_bit(XSVNIC_XT_DOWN, &vp->state))
+               strcat(tmp_buf, " + XT Down");
+
+       if (test_bit(XSVNIC_IBLINK_DOWN, &vp->state))
+               strcat(tmp_buf, " +  IB Link Down");
+
+       if (test_bit(XSVNIC_OVER_QUOTA, &vp->state))
+               strcat(tmp_buf, " +  No RX Quota");
+
+       seq_printf(m, "%s\n\n", tmp_buf);
+
+       /* Get LRO statistics */
+       if (vp->lro_mode) {
+               vp->counters[XSVNIC_RX_LRO_AGGR_PKTS] +=
+                   vp->lro.lro_mgr.stats.aggregated;
+               vp->counters[XSVNIC_RX_LRO_FLUSHED_PKT] +=
+                   vp->lro.lro_mgr.stats.flushed;
+               if (vp->lro.lro_mgr.stats.flushed)
+                       vp->counters[XSVNIC_RX_LRO_AVG_AGGR_PKTS] +=
+                           vp->lro.lro_mgr.stats.aggregated /
+                           vp->lro.lro_mgr.stats.flushed;
+               else
+                       vp->counters[XSVNIC_RX_LRO_AVG_AGGR_PKTS] = 0;
+               vp->counters[XSVNIC_RX_LRO_NO_DESCRIPTORS] +=
+                   vp->lro.lro_mgr.stats.no_desc;
+       }
+
+       seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared);
+       return 0;
+}
+
+static ssize_t xsvnic_proc_write_device(struct file *file,
+                                       const char __user *buffer,
+                                       size_t count, loff_t *offp)
+{
+       struct xsvnic *vp = PDE_DATA(file_inode(file));
+       int ret;
+       char action[64];
+
+       ret = sscanf(buffer, "%s", action);
+       if (ret != 1)
+               return -EINVAL;
+
+       if ((strlen(action) == 1) && (atoi(action) == 0)) {
+               /* Clear counters */
+               memset(vp->counters, 0, sizeof(vp->counters));
+               vp->counters_cleared++;
+               return count;
+       }
+
+       /*
+        * sscanf cannot copies spaces as in "rbatch on" so do a copy
+        */
+       memset(action, 0, sizeof(action));
+       strncpy(action, buffer, 12);
+
+       if (strcmp(action, XS_RESCHED_NAPI) == 0)
+               set_bit(XSVNIC_TRIGGER_NAPI_SCHED, &vp->state);
+       else if (strcmp(action, XS_READIB_BUF) == 0) {
+               struct xscore_buf_info binfo;
+               struct xscore_conn_ctx *ctx = &vp->data_conn.ctx;
+
+               ret = xscore_read_buf(ctx, &binfo);
+               if (ret != 1 || binfo.status)
+                       pr_info("xsvnic: %s No data found, status  %d\n",
+                              vp->vnic_name, binfo.status);
+               else {
+                       pr_info("xsvnic: %s", vp->vnic_name);
+                       pr_info("Data found ");
+                       pr_info("status %d", binfo.status);
+                       pr_info("length %d\n", binfo.sz);
+                       dev_kfree_skb_any(binfo.cookie);
+               }
+       } else if (strncmp(action, XS_RXBATCHING_ON, 9) == 0) {
+               ret = xsvnic_change_rxbatch(vp, 1);
+               if (ret != 1)
+                       pr_info("xsvnic: %s Cannot turn on rx batching %x\n",
+                              vp->vnic_name, ret);
+       } else if (strcmp(action, XS_RXBATCHING_OFF) == 0) {
+               ret = xsvnic_change_rxbatch(vp, 0);
+               if (ret != 1)
+                       pr_info("xsvnic: %s Cannot turn off rx batching %x\n",
+                              vp->vnic_name, ret);
+       } else if (strcmp(action, XS_SLAVE_ACTIVE) == 0) {
+               pr_info("%s XSVNIC[%s] Setting as active slave\n", __func__,
+                      vp->vnic_name);
+               clear_bit(XSVNIC_STATE_STDBY, &vp->state);
+       } else if (strcmp(action, XS_SLAVE_BACKUP) == 0) {
+               pr_info("%s XSVNIC[%s] Setting as standby slave\n",
+                      __func__, vp->vnic_name);
+               set_bit(XSVNIC_STATE_STDBY, &vp->state);
+       } else {
+               pr_info("xsvnic: %s  echo'ing %s is not valid\n",
+                       vp->vnic_name, action);
+       }
+
+       return count;
+}
+
+static int xsvnic_proc_open_device(struct inode *inode, struct file *file)
+{
+       return single_open(file, xsvnic_proc_read_device,
+                          PDE_DATA(file_inode(file)));
+}
+
+static int xsvnic_proc_read_device_counters(struct seq_file *m, void *data)
+{
+       struct xsvnic *vp = m->private;
+       int i;
+
+       for (i = 0; i < XSVNIC_MAX_COUNTERS; i++)
+               seq_printf(m, "%s%u\n", counter_name[i], vp->counters[i]);
+       seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared);
+
+       return 0;
+}
+
+static ssize_t xsvnic_proc_write_device_counters(struct file *file,
+                                                const char __user *buffer,
+                                                size_t count, loff_t *offp)
+{
+       struct xsvnic *vp = PDE_DATA(file_inode(file));
+       char action[64];
+       int ret;
+
+       ret = sscanf(buffer, "%s", action);
+       if (ret != 1) {
+               return -EINVAL;
+       }
+       if ((strlen(action) == 1) && (atoi(action) == 0)) {
+               /* Clear counters */
+               memset(vp->counters, 0, sizeof(vp->counters));
+               vp->counters_cleared++;
+       }
+       return count;
+}
+
+static int xsvnic_proc_open_device_counters(struct inode *inode,
+                                           struct file *file)
+{
+       return single_open(file, xsvnic_proc_read_device_counters,
+                          PDE_DATA(inode));
+}
+
+static void *xsvnic_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       return (*pos < XSVNIC_MAX_COUNTERS) ? &counter_name[*pos] : 0;
+}
+
+static void *xsvnic_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       ++*pos;
+       return (*pos < XSVNIC_MAX_COUNTERS) ? &counter_name[*pos] : 0;
+}
+
+static int xsvnic_seq_show(struct seq_file *seq, void *v)
+{
+       struct xsvnic *vp = seq->private;
+
+       if (vp->ix == XSVNIC_MAX_COUNTERS)
+               vp->ix = 0;
+
+       seq_printf(seq, "%s %u\n", counter_name[vp->ix], vp->counters[vp->ix]);
+       vp->ix++;
+
+       return 0;
+}
+
+static void xsvnic_seq_stop(struct seq_file *seq, void *v)
+{
+       /* Nothing to be done here */
+}
+
+static int xsvnic_open(struct inode *inode, struct file *sfile)
+{
+       struct seq_file *seq;
+       int ret_val;
+
+       ret_val = seq_open(sfile, &xsvnic_seq_ops);
+       if (!ret_val) {
+               /* recover the pointer buried in proc_dir_entry data */
+               seq = sfile->private_data;
+               seq->private = PDE_DATA(inode);
+       }
+
+       return ret_val;
+};
+
+int xsvnic_add_proc_entry(struct xsvnic *vp)
+{
+       struct proc_dir_entry *file, *counter;
+
+       vp->vnic_dir = proc_mkdir(vp->vnic_name, proc_root_xsvnic_dev);
+
+       file = proc_create_data(vp->vnic_name, S_IFREG, vp->vnic_dir,
+                               &xsvnic_device_proc_fops, vp);
+       if (!file) {
+               pr_info("Unable to create the xsvnic /proc entry\n");
+               return -ENOMEM;
+       }
+       if (xs_seq_file) {
+               /* Using seq_file for OVM */
+               counter = proc_create_data("counters", S_IFREG, vp->vnic_dir,
+                               &xsvnic_file_ops, vp);
+       } else {
+               counter = proc_create_data("counters", S_IFREG, vp->vnic_dir,
+                               &xsvnic_device_counters_proc_fops, vp);
+       }
+
+       if (!counter) {
+               pr_info("Unable to create the xsvnic /proc entry\n");
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+void xsvnic_remove_proc_entry(struct xsvnic *vp)
+{
+       remove_proc_entry(vp->vnic_name, vp->vnic_dir);
+       remove_proc_entry("counters", vp->vnic_dir);
+       remove_proc_entry(vp->vnic_name, proc_root_xsvnic_dev);
+}
+
+static ssize_t xsvnic_proc_write_debug(struct file *file,
+                                      const char __user *buffer, size_t count,
+                                      loff_t *offp)
+{
+       int newval, ret;
+       char    *buf = (char *) __get_free_page(GFP_USER);
+       if (!buf) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(buf, buffer, count - 1)) {
+               goto out;
+       }
+       buf[count] = '\0';
+
+       ret = kstrtoint(buf, 0, &newval);
+       if (ret != 0) {
+               return -EINVAL;
+       }
+       xsvnic_debug = newval;
+       return count;
+
+out:
+       free_page((unsigned long)buf);
+       return -EINVAL;
+}
+
+static int xsvnic_proc_read_debug(struct seq_file *m, void *data)
+{
+       int i;
+
+       seq_printf(m, "Total Wait time(secs): %ld\n", (xsvnic_wait_time / HZ));
+       seq_printf(m, "Debug bitmask        : 0x%x\n\n", xsvnic_debug);
+       for (i = 0; i < XSVNIC_MAX_GLOB_COUNTERS; i++)
+               seq_printf(m, "%s%d\n", glob_counter_name[i],
+                          xsvnic_counters[i]);
+       return 0;
+}
+
+static int xsvnic_proc_open_debug(struct inode *inode, struct file *file)
+{
+       return single_open(file, xsvnic_proc_read_debug, PDE_DATA(inode));
+}
+
+static int xsvnic_proc_read_iscsi_boot(struct seq_file *m, void *data)
+{
+       struct xsvnic *vp;
+
+       mutex_lock(&xsvnic_mutex);
+
+       list_for_each_entry(vp, &xsvnic_list, xsvnic_list) {
+               if (vp->iscsi_boot_info.initiator_iqn[0] == '\0')
+                       continue;
+               seq_printf(m, "iscsiserver=%d.%d.%d.%d:%d\n",
+                          (vp->iscsi_boot_info.target_ip_address >> 24) & 0xff,
+                          (vp->iscsi_boot_info.target_ip_address >> 16) & 0xff,
+                          (vp->iscsi_boot_info.target_ip_address >> 8) & 0xff,
+                          (vp->iscsi_boot_info.target_ip_address >> 0) & 0xff,
+                          vp->iscsi_boot_info.port);
+               seq_printf(m, "iscsiinitiator=%s\n",
+                          vp->iscsi_boot_info.initiator_iqn);
+               seq_printf(m, "iscsitarget=%s:%d\n",
+                          vp->iscsi_boot_info.target_iqn,
+                          vp->iscsi_boot_info.lun);
+
+               if (vp->iscsi_boot_info.ip_addr == 0)
+                       seq_printf(m, "iscsiboot=%s\n",
+                                  vp->iscsi_boot_info.vnic_name);
+               else {
+                       seq_printf(m,
+                                  "iscsiboot=%s:%d.%d.%d.%d:%d.%d.%d.%d:%d.%d.%d.%d:%d.%d.%d.%d\n",
+                                  vp->iscsi_boot_info.vnic_name,
+                                  (vp->iscsi_boot_info.ip_addr >> 24) & 0xff,
+                                  (vp->iscsi_boot_info.ip_addr >> 16) & 0xff,
+                                  (vp->iscsi_boot_info.ip_addr >> 8) & 0xff,
+                                  (vp->iscsi_boot_info.ip_addr >> 0) & 0xff,
+                                  (vp->iscsi_boot_info.netmask >> 24) & 0xff,
+                                  (vp->iscsi_boot_info.netmask >> 16) & 0xff,
+                                  (vp->iscsi_boot_info.netmask >> 8) & 0xff,
+                                  (vp->iscsi_boot_info.netmask >> 0) & 0xff,
+                                  (vp->iscsi_boot_info.
+                                   gateway_ip_address >> 24) & 0xff,
+                                  (vp->iscsi_boot_info.
+                                   gateway_ip_address >> 16) & 0xff,
+                                  (vp->iscsi_boot_info.
+                                   gateway_ip_address >> 8) & 0xff,
+                                  (vp->iscsi_boot_info.
+                                   gateway_ip_address >> 0) & 0xff,
+                                  (vp->iscsi_boot_info.
+                                   dns_ip_address >> 24) & 0xff,
+                                  (vp->iscsi_boot_info.
+                                   dns_ip_address >> 16) & 0xff,
+                                  (vp->iscsi_boot_info.
+                                   dns_ip_address >> 8) & 0xff,
+                                  (vp->iscsi_boot_info.
+                                   dns_ip_address >> 0) & 0xff);
+               }
+
+               if (vp->iscsi_boot_info.mount_type == SAN_MOUNT_TYPE_LVM) {
+                       if (vp->iscsi_boot_info.vol_group[0] != '\0')
+                               seq_printf(m, "sanmount=lvm:%s:%s\n",
+                                          vp->iscsi_boot_info.vol_group,
+                                          vp->iscsi_boot_info.vol_group_name);
+               } else if (vp->iscsi_boot_info.mount_type ==
+                          SAN_MOUNT_TYPE_DIRECT) {
+                       /* direct mount device */
+                       if (vp->iscsi_boot_info.mount_dev[0] != '\0')
+                               seq_printf(m, "sanmount=%s\n",
+                                          vp->iscsi_boot_info.mount_dev);
+               }
+               seq_printf(m, "iscsitpg=%s\n",
+                          vp->iscsi_boot_info.target_portal_group);
+       }
+
+       mutex_unlock(&xsvnic_mutex);
+
+       return 0;
+}
+
+static ssize_t xsvnic_proc_write_iscsi_boot(struct file *file,
+                                           const char __user *buffer,
+                                           size_t count, loff_t *offp)
+{
+/* Not implemented (dummy write) */
+       return count;
+}
+
+static int xsvnic_proc_open_iscsi_boot(struct inode *inode, struct file *file)
+{
+       return single_open(file, xsvnic_proc_read_iscsi_boot, PDE_DATA(inode));
+}
+
+int xsvnic_create_procfs_root_entries(void)
+{
+       struct proc_dir_entry *debug_file;
+       int ret = 0;
+
+       proc_root_xsvnic = proc_mkdir("driver/xsvnic", NULL);
+       if (!proc_root_xsvnic) {
+               pr_info("Unable to create /proc/driver/xsvnic\n");
+               return -ENOMEM;
+       }
+       proc_root_xsvnic_dev = proc_mkdir("devices", proc_root_xsvnic);
+       if (!proc_root_xsvnic_dev) {
+               pr_info("Unable to create /proc/driver/xsvnic/devices\n");
+               ret = -ENOMEM;
+               goto create_proc_end_1;
+       }
+       debug_file = proc_create_data("debug", S_IFREG, proc_root_xsvnic,
+                                     &xsvnic_debug_proc_fops, NULL);
+       if (!debug_file) {
+               pr_info("Unable to create /proc/driver/xsvnic/debug\n");
+               ret = -ENOMEM;
+               goto create_proc_end_2;
+       }
+
+       iscsi_boot = proc_create_data("boot-info", S_IFREG, proc_root_xsvnic,
+                                     &xsvnic_iscsi_boot_proc_fops, NULL);
+       if (!iscsi_boot) {
+               pr_info("Unable to create /proc/driver/xsvnic/boot-info\n");
+               ret = -ENOMEM;
+               goto create_proc_end_3;
+       }
+
+       return 0;
+
+create_proc_end_3:
+       remove_proc_entry("debug", proc_root_xsvnic);
+create_proc_end_2:
+       remove_proc_entry("devices", proc_root_xsvnic_dev);
+create_proc_end_1:
+       remove_proc_entry("driver/xsvnic", NULL);
+       return ret;
+}
+
+void xsvnic_remove_procfs_root_entries(void)
+{
+       remove_proc_entry("debug", proc_root_xsvnic);
+       remove_proc_entry("devices", proc_root_xsvnic);
+       remove_proc_entry("boot-info", proc_root_xsvnic);
+       remove_proc_entry("driver/xsvnic", NULL);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xsvnic/xsvnic_xsmp_msgs.h b/drivers/infiniband/ulp/xsigo/xsvnic/xsvnic_xsmp_msgs.h
new file mode 100644 (file)
index 0000000..159b091
--- /dev/null
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSVNIC_XSMP_MSGS_H__
+#define __XSVNIC_XSMP_MSGS_H__
+
+#define XSVNIC_MAX_NAME_SIZE           16
+#define CHASSIS_MAX_NAME_SIZE           32
+#define SESSION_MAX_NAME_SIZE           32
+#define XSVNIC_MAX_HOST_NAME           32
+#define MP_GROUP_NAME_MAX              (XSVNIC_MAX_NAME_SIZE + \
+                                       XSVNIC_MAX_HOST_NAME)
+#define XSVNIC_VNIC_NAMELENTH          15
+
+enum xsvnic_xsmp_cmd_type {
+       XSMP_XSVNIC_INVALID,
+       XSMP_XSVNIC_INSTALL,
+       XSMP_XSVNIC_DELETE,
+       XSMP_XSVNIC_UPDATE,
+       XSMP_XSVNIC_ADMIN_UP,
+       XSMP_XSVNIC_ADMIN_DOWN,
+       XSMP_XSVNIC_OPER_UP,
+       XSMP_XSVNIC_OPER_DOWN,
+       XSMP_XSVNIC_OPER_READY,
+       XSMP_XSVNIC_VLANIP,     /* VLAN and IP address */
+       XSMP_XSVNIC_STATS,      /* XSVNIC driver statistics */
+       XSMP_XSVNIC_SYNC_BEGIN,
+       XSMP_XSVNIC_SYNC_END,
+       XSMP_XSVNIC_INFO_REQUEST,       /* request vnic info  */
+       XSMP_XSVNIC_OPER_FAILED,
+       XSMP_XSVNIC_OPER_REQ,
+       XSMP_XSVNIC_HA_INFO,
+       XSMP_XSVNIC_ISCSI_INFO,
+
+       XSMP_XSVNIC_TYPE_MAX,
+};
+
+/* XSVNIC specific messages */
+
+struct xsvnic_xsmp_msg {
+       union {
+               struct {
+                       u8 type;
+                       u8 code;
+                       u16 length;
+                       u32 bitmask;
+                       u64 resource_id;
+                       u64 tca_guid;
+                       u16 tca_lid;
+                       u16 mac_high;
+                       u32 mac_low;
+                       u16 vn_admin_rate;
+                       u16 admin_state;
+                       u16 encap;
+                       u16 vn_mtu;
+                       u32 install_flag;
+                       u8 vnic_name[XSVNIC_MAX_NAME_SIZE];
+                       u16 service_level;      /* SL value for this vnic */
+                       /* 1: enable, 0: disable host rate control */
+                       u16 fc_active;
+                       u16 cir;        /* committed rate in mbps */
+                       u16 pir;        /* peak rate in mbps */
+                       u32 cbs;        /* committed burst size in bytes */
+                       u32 pbs;        /* peak burst size in bytes */
+                       /* the index used by vmware for persistence */
+                       u8 vm_index;
+                       u8 _reserved;
+                       u16 mp_flag;
+                       u8 mp_group[MP_GROUP_NAME_MAX];
+               } __packed;
+               u8 bytes[512];
+       };
+} __packed;
+
+/* The reason code for NACKing an install  */
+/* vnic name exceeding 15 chars */
+#define XSVNIC_NACK_INVALID            0
+/* duplicate name */
+#define XSVNIC_NACK_DUP_NAME           1
+/* duplicate VID */
+#define XSVNIC_NACK_DUP_VID            2
+/* Max number of XSVNICs reached */
+#define XSVNIC_NACK_LIMIT_REACHED      3
+/* Error during instantiation */
+#define XSVNIC_NACK_ALLOCATION_ERROR   4
+#define XSVNIC_NACK_CODE_MAX           5
+
+/* The common XSVNIC XSMP header for all messages */
+struct xsvnic_xsmp_header {
+       u8 type;
+       u8 code;
+       u16 length;
+       u32 bitmask;
+       u64 resource_id;
+};
+
+/* Maximum number of dwords in an IP address (v4 or v6) */
+#define MAX_IP_ADDR_DWORDS     4
+
+/* IP address type */
+enum xsvnic_ipaddr_type {
+       ADDR_TYPE_IPV4 = 1,
+       ADDR_TYPE_IPV6,
+};
+
+/* Bitmask values for add/delete VLAN notifications */
+#define XSVNIC_ADD_VLAN_NOTIFY         (1 << 0)
+#define XSVNIC_DELETE_VLAN_NOTIFY      (1 << 1)
+
+/* Denotes an instance of a VLANID and IP address pair */
+struct xsvnic_xsmp_vlanip_msg {
+       union {
+               struct {
+                       u8 type;
+                       u8 code;
+                       u16 length;
+                       u32 bitmask;
+                       u64 resource_id;
+                       u8 ip_type;
+                       u8 _reserved1;
+                       u16 _reserved2;
+                       u32 vlanid;
+                       u32 ipaddress[MAX_IP_ADDR_DWORDS];
+                       u32 netmask[MAX_IP_ADDR_DWORDS];
+                       /*
+                        * This does not come from chassis but locally generated
+                        */
+                       char ifname[XSVNIC_MAX_NAME_SIZE];
+                       u16 mp_flag;
+               } __packed;
+               u8 bytes[512];
+       };
+};
+
+struct xsvnic_ha_info_msg {
+       union {
+               struct {
+                       u8 type;
+                       u8 code;
+                       u16 length;
+                       u32 reserved;
+                       u64 resource_id;        /* vid */
+                       u8 ha_state;
+                       u8 name[XSVNIC_MAX_NAME_SIZE];
+               } __packed;
+               u8 bytes[512];
+       };
+} __packed;
+
+#define ISCSI_MOUNT_DEV_NAME_LEN    100
+#define MAX_DOMAIN_NAME_LEN 64
+
+#define SAN_MOUNT_TYPE_STATIC 1
+#define SAN_MOUNT_TYPE_LVM    2
+#define SAN_MOUNT_TYPE_DIRECT 3
+
+struct xsvnic_iscsi_info {
+       uint64_t vid;
+       uint8_t role;
+       uint16_t vlan_id;
+       uint8_t ip_type;
+       uint32_t ip_addr;
+       uint32_t netmask;
+       uint64_t mac;
+       char vnic_name[XSVNIC_MAX_NAME_SIZE];
+       uint32_t gateway_ip_address;
+       uint32_t dns_ip_address;
+       char domain_name[MAX_DOMAIN_NAME_LEN];
+       uint16_t protocol;
+       uint16_t port;
+       uint16_t lun;
+       uint32_t target_ip_address;
+       char target_iqn[ISCSI_MOUNT_DEV_NAME_LEN];      /* Target Name */
+       char target_portal_group[ISCSI_MOUNT_DEV_NAME_LEN];
+       char initiator_iqn[ISCSI_MOUNT_DEV_NAME_LEN];
+
+       uint16_t mount_type;
+       char mount_dev[ISCSI_MOUNT_DEV_NAME_LEN];
+       char mount_options[ISCSI_MOUNT_DEV_NAME_LEN];
+       char vol_group[ISCSI_MOUNT_DEV_NAME_LEN];
+       char vol_group_name[ISCSI_MOUNT_DEV_NAME_LEN];
+} __packed;
+
+struct xsvnic_iscsi_msg {
+       union {
+               struct {
+                       uint8_t type;
+                       uint8_t code;
+                       uint16_t length;
+                       struct xsvnic_iscsi_info iscsi_info;
+               } __packed;
+               uint8_t bytes[960];
+       };
+} __packed;
+
+/* Values for the bitmask of the install/delete/update message*/
+#define XSVNIC_UPDATE_MAC              (1 << 0)
+#define XSVNIC_UPDATE_BANDWIDTH                (1 << 1)
+#define XSVNIC_UPDATE_MTU              (1 << 2)
+#define XSVNIC_UPDATE_TCA_INFO         (1 << 3)
+#define XSVNIC_UPDATE_SL               (1 << 4)
+#define XSVNIC_UPDATE_ENCAP            (1 << 5)
+#define XSVNIC_UPDATE_ADMIN_STATE      (1 << 6)
+#define XSVNIC_UPDATE_QOS              (1 << 7)
+#define XSVNIC_UPDATE_ACL              (1 << 8)
+#define XSVNIC_UPDATE_MP_FLAG          (1 << 10)
+#define XSVNIC_XT_STATE_DOWN           (1 << 30)
+#define XSVNIC_UPDATE_XT_CHANGE                (1 << 31)
+
+/* mp_flag */
+#define MP_XSVNIC_PRIMARY         (1 << 0)
+#define MP_XSVNIC_SECONDARY       (1 << 1)
+#define MP_XSVNIC_AUTO_SWITCH     (1 << 2)
+
+/* ha_state */
+#define XSVNIC_HA_STATE_UNKNOWN        0
+#define XSVNIC_HA_STATE_ACTIVE 1
+#define XSVNIC_HA_STATE_STANDBY        2
+
+/* Ack and Nack sent out in the 'code' field */
+#define        XSMP_XSVNIC_ACK         (1 << 6)
+#define        XSMP_XSVNIC_NACK                (1 << 7)
+
+/* Bits for the promiscuous flag field */
+#define XSVNIC_MCAST           (1 << 0)
+
+/* Defines for the install flag */
+#define XSVNIC_INSTALL_TCP_OFFL        (1 << 0)
+#define XSVNIC_INSTALL_UDP_OFFL        (1 << 1)
+#define XSVNIC_INSTALL_TSO     (1 << 3)
+#define XSVNIC_INSTALL_RX_BAT  (1 << 4)
+#define XSVNIC_8K_IBMTU                (1 << 5)
+#define        XSVNIC_INSTALL_LINK2QP  (1 << 8)
+
+#define XSIGO_IP_FRAGMENT_BIT       (1 << 8)
+#define XSIGO_IPV4_BIT              (1 << 6)
+#define XSIGO_TCP_CHKSUM_GOOD_BIT   (1 << 3)
+#define XSIGO_UDP_CHKSUM_GOOD_BIT   (1 << 1)
+
+#endif /* __XSVNIC_XSMP_MSGS_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xve/Kconfig b/drivers/infiniband/ulp/xsigo/xve/Kconfig
new file mode 100644 (file)
index 0000000..3b7024a
--- /dev/null
@@ -0,0 +1,44 @@
+config INFINIBAND_XVE
+       tristate "Xsigo Virtual Ethernet"
+       depends on INFINIBAND_XSCORE && NETDEVICES && INET && (IPV6 || IPV6=n)
+       select INET_LRO
+       ---help---
+         Support for the Xsigo Virtual Ethernet (XVE) protocol,
+         Allowing private connectivity between servers using
+         Fabric switched Traffic.
+
+config INFINIBAND_XVE_CM
+       bool "Xsigo Virtual Ethernet Connected Mode support"
+       depends on INFINIBAND_XVE
+       default n
+       ---help---
+         This option enables support for XVE connected mode.
+
+         WARNING: Enabling connected mode will trigger some packet
+         drops for multicast and UD mode traffic from this interface,
+         unless you limit mtu for these destinations to 2030.
+
+config INFINIBAND_XVE_DEBUG
+       bool "Xsigo Virtual Ethernet debugging" if EMBEDDED
+       depends on INFINIBAND_XVE
+       default n
+       ---help---
+         This option causes debugging code to be compiled into the
+         XVE driver.  The output can be turned on via the
+         debug_level and mcast_debug_level module parameters (which
+         can also be set after the driver is loaded through sysfs).
+
+         This option also creates a directory tree under xve/ in
+         debugfs, which contains files that expose debugging
+         information about IB multicast groups used by the XVE
+         driver.
+
+config INFINIBAND_XVE_DEBUG_DATA
+       bool "Xsigo Virtual Ethernet data path debugging"
+       depends on INFINIBAND_XVE_DEBUG
+       ---help---
+         This option compiles debugging code into the data path
+         of the XVE driver.  The output can be turned on via the
+         data_debug_level module parameter; however, even with output
+         turned off, this debugging code will have some performance
+         impact.
diff --git a/drivers/infiniband/ulp/xsigo/xve/Makefile b/drivers/infiniband/ulp/xsigo/xve/Makefile
new file mode 100644 (file)
index 0000000..13f4dec
--- /dev/null
@@ -0,0 +1,10 @@
+obj-$(CONFIG_INFINIBAND_XVE) := xve.o
+xve-y := xve_main.o xve_verbs.o xve_multicast.o xve_ib.o xve_tables.o \
+        xve_ethtool.o xve_cm.o xve_stats.o
+
+ccflags-y += -DXSIGO_LOCAL_VERSION=\"6.0.r8004\"
+ccflags-y += -DRDMA_PORT_LINK_LAYER_CHANGES -DHAS_SKB_ACCESS_FUNCTIONS
+ccflags-y += -DSCSI_STRUCT_CHANGES -DSCSI_TIMEOUT_CHANGES -DLLE
+ccflags-y += -DXG_FRAG_SIZE_PRESENT -DXG_FRAG_PAGE_PRESENT
+ccflags-y += -Idrivers/infiniband/ulp/xsigo/xscore
+ccflags-y += -Idrivers/infiniband/include
diff --git a/drivers/infiniband/ulp/xsigo/xve/hash.h b/drivers/infiniband/ulp/xsigo/xve/hash.h
new file mode 100644 (file)
index 0000000..363c70c
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2008, 2009, 2010 Nicira Networks.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef HASH_H
+#define HASH_H 1
+
+/* This is the public domain lookup3 hash by Bob Jenkins from
+ * http://burtleburtle.net/bob/c/lookup3.c, modified for style. */
+
+#define HASH_ROT(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
+
+#define HASH_MIX(a, b, c)                       \
+       do {                                    \
+               a -= c; a ^= HASH_ROT(c, 4); c += b;    \
+               b -= a; b ^= HASH_ROT(a, 6); a += c;    \
+               c -= b; c ^= HASH_ROT(b, 8); b += a;    \
+               a -= c; a ^= HASH_ROT(c, 16); c += b;   \
+               b -= a; b ^= HASH_ROT(a, 19); a += c;   \
+               c -= b; c ^= HASH_ROT(b, 4); b += a;    \
+       } while (0)
+
+#define HASH_FINAL(a, b, c)                    \
+       do {                                    \
+               c ^= b; c -= HASH_ROT(b, 14);           \
+               a ^= c; a -= HASH_ROT(c, 11);           \
+               b ^= a; b -= HASH_ROT(a, 25);           \
+               c ^= b; c -= HASH_ROT(b, 16);           \
+               a ^= c; a -= HASH_ROT(c,  4);           \
+               b ^= a; b -= HASH_ROT(a, 14);           \
+               c ^= b; c -= HASH_ROT(b, 24);           \
+       } while (0)
+
+static inline uint32_t hash_bytes(const void *p_, size_t n, uint32_t basis)
+{
+       const uint8_t *p = p_;
+       uint32_t a, b, c;
+       uint32_t tmp[3];
+
+       a = b = c = 0xdeadbeef + n + basis;
+
+       while (n >= sizeof(tmp)) {
+               memcpy(tmp, p, sizeof(tmp));
+               a += tmp[0];
+               b += tmp[1];
+               c += tmp[2];
+               HASH_MIX(a, b, c);
+               n -= sizeof(tmp);
+               p += sizeof(tmp);
+       }
+
+       if (n) {
+               tmp[0] = tmp[1] = tmp[2] = 0;
+               memcpy(tmp, p, n);
+               a += tmp[0];
+               b += tmp[1];
+               c += tmp[2];
+               HASH_FINAL(a, b, c);
+       }
+
+       return c;
+}
+
+#endif /* hash.h */
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve.h b/drivers/infiniband/ulp/xsigo/xve/xve.h
new file mode 100644 (file)
index 0000000..5bd33b2
--- /dev/null
@@ -0,0 +1,1537 @@
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _XVE_H
+#define _XVE_H
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/icmpv6.h>
+#include <linux/vmalloc.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/workqueue.h>
+#include <linux/kref.h>
+#include <linux/if_vlan.h>
+#include <linux/if_infiniband.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/if_arp.h>
+#include <linux/inet_lro.h>
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/kernel.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+#include <linux/err.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/proc_fs.h>
+#include <linux/version.h>
+#include <linux/mii.h>
+
+#include <net/neighbour.h>
+#include <net/dst.h>
+
+#include <linux/atomic.h>
+#include <asm/unaligned.h>
+
+#include <rdma/ib_cm.h>
+#include <rdma/ib_cache.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_sa.h>
+#include <rdma/ib_cache.h>
+
+#include "xscore.h"
+#include "hash.h"
+#include "xsmp_common.h"
+#include "xsmp_session.h"
+#include "xve_xsmp_msgs.h"
+
+#ifndef XSIGO_LOCAL_VERSION
+#define XVE_DRIVER_VERSION "0.31"
+#else
+#define XVE_DRIVER_VERSION "0.31" XSIGO_LOCAL_VERSION
+#endif
+
+#ifndef NETIF_F_LRO
+#define NETIF_F_LRO        NETIF_F_SW_LRO
+#endif
+
+#ifndef bool
+#define bool int
+#define true 1
+#define false 0
+#endif
+
+/* macros for ipv6 support */
+
+/* 86 bytes */
+#define XVE_IPV6_MIN_PACK_LEN   86
+/* as per the protocol */
+#define IPV6_HDR_LEN            40
+/* 128 bits IP address length for ipv6 */
+#define IPV6_ADDR_LEN           16
+/* next header (icmp-ndp) in ipv6 header */
+#define NEXTHDR_ICMP            58
+/* Neighbor solicitation packet type */
+#define ICMP_NDP_TYPE           135
+/* payload length in ipv6 (icmp header + optional header 24 + 8 ) */
+#define PAYLOAD_LEN             32
+/* as per the protocol */
+#define ICMP_CODE               0
+/* length of ICMP-NDP header */
+#define ICMP_NDP_HDR_LEN        24
+/* source link layer address type */
+#define ICMP_OPTION_TYPE        1
+/* 8 bytes length of ICMP option header */
+#define ICMP_OPTION_LEN         1
+/* prefix for destination multicast address */
+#define PREFIX_MULTI_ADDR      0x33
+/* ethernet header length */
+#define ETH_HDR_LEN            14
+
+/* constants */
+enum xve_flush_level {
+       XVE_FLUSH_LIGHT,
+       XVE_FLUSH_NORMAL,
+       XVE_FLUSH_HEAVY
+};
+
+enum {
+       XVE_UD_HEAD_SIZE = IB_GRH_BYTES + VLAN_ETH_HLEN,
+       XVE_UD_RX_SG = 2,       /* max buffer needed for 4K mtu */
+       XVE_CM_MTU = 0x10000 - 0x20,    /* padding to align header to 16 */
+       XVE_CM_BUF_SIZE = XVE_CM_MTU + VLAN_ETH_HLEN,
+       XVE_CM_HEAD_SIZE = XVE_CM_BUF_SIZE % PAGE_SIZE,
+       XVE_CM_RX_SG = ALIGN(XVE_CM_BUF_SIZE, PAGE_SIZE) / PAGE_SIZE,
+       XVE_RX_RING_SIZE = 256,
+       XVE_TX_RING_SIZE = 128,
+       XVE_MAX_QUEUE_SIZE = 8192,
+       XVE_MIN_QUEUE_SIZE = 2,
+       XVE_CM_MAX_CONN_QP = 4096,
+       XVE_NUM_WC = 4,
+       XVE_MAX_PATH_REC_QUEUE = 3,
+       XVE_MAX_MCAST_QUEUE = 3,
+       XVE_MCAST_FLAG_FOUND = 0,       /* used in set_multicast_list */
+       XVE_MCAST_FLAG_SENDONLY = 1,
+       XVE_MCAST_FLAG_BUSY = 2,        /* joining or already joined */
+       XVE_MCAST_FLAG_ATTACHED = 3,
+       XVE_MAX_LRO_DESCRIPTORS = 8,
+       XVE_LRO_MAX_AGGR = 64,
+       MAX_SEND_CQE = 32,
+       XVE_CM_COPYBREAK = 256,
+};
+
+enum {
+       XVE_FLAG_OPER_UP = 0,
+       XVE_FLAG_INITIALIZED = 1,
+       XVE_FLAG_ADMIN_UP = 2,
+       XVE_PKEY_ASSIGNED = 3,
+       XVE_PKEY_STOP = 4,
+       XVE_IB_DEV_OPEN = 5,
+       XVE_MCAST_RUN = 6,
+       XVE_STOP_REAPER = 7,
+       XVE_FLAG_ADMIN_CM = 9,
+       XVE_FLAG_UMCAST = 10,
+       XVE_FLAG_CSUM = 11,
+       XVE_MCAST_RUN_GC = 12,
+       XVE_FLAG_ADVERT_JOIN = 13,
+       XVE_FLAG_IB_EVENT = 14,
+       XVE_FLAG_DONT_DETACH_MCAST = 15,
+       XVE_MAX_BACKOFF_SECONDS = 16,
+};
+
+enum xve_advert_types {
+       XVE_ADVERT_JOIN = 1,
+       XVE_ADVERT_RESP = 2,
+       XVE_ADVERT_UPD = 3,
+};
+
+enum {
+       XVE_SYNC_END_DEL_COUNTER,
+       XVE_VNIC_INSTALL_COUNTER,
+       XVE_VNIC_DEL_COUNTER,
+       XVE_VNIC_DEL_NOVID_COUNTER,
+       XVE_VNIC_UPDATE_COUNTER,
+       XVE_VNIC_SYNC_BEGIN_COUNTER,
+       XVE_VNIC_SYNC_END_COUNTER,
+       XVE_VNIC_OPER_REQ_COUNTER,
+       XVE_VNIC_UNSUP_XSMP_COUNTER,
+       XVE_ISCSI_INFO_COUNTER,
+       XVE_DEVICE_REMOVAL_COUNTER,
+       XVE_VNIC_STATS_COUNTER,
+       XVE_NUM_PAGES_ALLOCED,
+       XVE_MAX_GLOB_COUNTERS
+};
+enum {
+       XVE_DATA_HBEAT_COUNTER,
+       XVE_HBEAT_ERR_COUNTER,
+       XVE_STATE_MACHINE,
+       XVE_STATE_MACHINE_UP,
+       XVE_STATE_MACHINE_DOWN,
+       XVE_NAPI_POLL_COUNTER,
+       XVE_SHORT_PKT_COUNTER,
+       XVE_TX_COUNTER,
+       XVE_TX_SKB_FREE_COUNTER,
+       XVE_TX_VLAN_COUNTER,
+       XVE_TX_ERROR_COUNTER,
+       XVE_TX_WRB_EXHAUST,
+       XVE_TX_DROP_OPER_DOWN_COUNT,
+       XVE_TX_SKB_ALLOC_ERROR_COUNTER,
+       XVE_TX_RING_FULL_COUNTER,
+       XVE_TX_WAKE_UP_COUNTER,
+       XVE_TX_QUEUE_STOP_COUNTER,
+       XVE_RX_SKB_COUNTER,
+       XVE_RX_SKB_ALLOC_COUNTER,
+       XVE_RX_SMALLSKB_ALLOC_COUNTER,
+       XVE_RX_SKB_FREE_COUNTER,
+       XVE_RX_SKB_OFFLOAD_COUNTER,
+       XVE_RX_SKB_OFFLOAD_FRAG_COUNTER,
+       XVE_RX_SKB_OFFLOAD_NONIPV4_COUNTER,
+       XVE_RX_ERROR_COUNTER,
+       XVE_RX_QUOTA_EXCEEDED_COUNTER,
+       XVE_RX_NOBUF_COUNTER,
+       XVE_NAPI_SCHED_COUNTER,
+       XVE_NAPI_NOTSCHED_COUNTER,
+       XVE_NAPI_RESCHEDULE_COUNTER,
+       XVE_OPEN_COUNTER,
+       XVE_STOP_COUNTER,
+       XVE_GETSTATS_COUNTER,
+       XVE_SET_MCAST_COUNTER,
+       XVE_VLAN_RX_ADD_COUNTER,
+       XVE_VLAN_RX_DEL_COUNTER,
+       XVE_IOCTL_COUNTER,
+       XVE_WDOG_TIMEOUT_COUNTER,
+       XVE_OPER_REQ_COUNTER,
+       XVE_ADMIN_UP_COUNTER,
+       XVE_ADMIN_DOWN_COUNTER,
+       XVE_OPER_UP_STATE_COUNTER,
+       XVE_QP_ERROR_COUNTER,
+       XVE_IB_RECOVERY_COUNTER,
+       XVE_IB_RECOVERED_COUNTER,
+       XVE_IBLINK_DOWN_COUNTER,
+       XVE_IBLINK_UP_COUNTER,
+       XVE_IB_PORT_NOT_ACTIVE,
+       XVE_SENT_OPER_UP_COUNTER,
+       XVE_SENT_OPER_DOWN_COUNTER,
+       XVE_SENT_OPER_STATE_FAILURE_COUNTER,
+       XVE_SENT_OPER_STATE_SUCCESS_COUNTER,
+       XVE_DROP_STANDBY_COUNTER,
+
+       XVE_MAC_LEARN_COUNTER,
+       XVE_MAC_AGED_COUNTER,
+       XVE_MAC_AGED_CHECK,
+       XVE_MAC_AGED_NOMATCHES,
+       XVE_MAC_STILL_INUSE,
+       XVE_MAC_MOVED_COUNTER,
+
+       XVE_MCAST_JOIN_TASK,
+       XVE_MCAST_LEAVE_TASK,
+       XVE_MCAST_CARRIER_TASK,
+
+       XVE_TX_UD_COUNTER,
+       XVE_TX_RC_COUNTER,
+       XVE_TX_MCAST_PKT,
+       XVE_TX_MCAST_ARP_QUERY,
+       XVE_TX_MCAST_NDP_QUERY,
+       XVE_TX_MCAST_ARP_VLAN_QUERY,
+       XVE_TX_MCAST_NDP_VLAN_QUERY,
+       XVE_TX_MCAST_FLOOD_UD,
+       XVE_TX_MCAST_FLOOD_RC,
+       XVE_TX_QUEUE_PKT,
+
+       XVE_PATH_NOT_FOUND,
+       XVE_PATH_NOT_SETUP,
+       XVE_AH_NOT_FOUND,
+
+       XVE_PATHREC_QUERY_COUNTER,
+       XVE_PATHREC_RESP_COUNTER,
+       XVE_PATHREC_RESP_ERR_COUNTER,
+
+       XVE_SM_CHANGE_COUNTER,
+       XVE_CLIENT_REREGISTER_COUNTER,
+       XVE_EVENT_PORT_ERR_COUNTER,
+       XVE_EVENT_PORT_ACTIVE_COUNTER,
+       XVE_EVENT_LID_CHANGE_COUNTER,
+       XVE_EVENT_PKEY_CHANGE_COUNTER,
+       XVE_INVALID_EVENT_COUNTER,
+
+       XVE_MAX_COUNTERS
+};
+
+enum {
+       /* Work queue Counters */
+       XVE_WQ_START_PKEYPOLL,
+       XVE_WQ_FINISH_PKEYPOLL,
+       XVE_WQ_START_AHREAP,
+       XVE_WQ_FINISH_AHREAP,
+       XVE_WQ_START_FWT_AGING,
+       XVE_WQ_FINISH_FWT_AGING,
+       XVE_WQ_START_MCASTJOIN,
+       XVE_WQ_FINISH_MCASTJOIN,
+       XVE_WQ_START_MCASTLEAVE,
+       XVE_WQ_FINISH_MCASTLEAVE,
+       XVE_WQ_START_MCASTON,
+       XVE_WQ_FINISH_MCASTON,
+       XVE_WQ_START_MCASTRESTART,
+       XVE_WQ_FINISH_MCASTRESTART,
+       XVE_WQ_START_FLUSHLIGHT,
+       XVE_WQ_FINISH_FLUSHLIGHT,
+       XVE_WQ_START_FLUSHNORMAL,
+       XVE_WQ_FINISH_FLUSHNORMAL,
+       XVE_WQ_START_FLUSHHEAVY,
+       XVE_WQ_FINISH_FLUSHHEAVY,
+       XVE_WQ_START_CMSTALE,
+       XVE_WQ_FINISH_CMSTALE,
+       XVE_WQ_START_CMTXSTART,
+       XVE_WQ_FINISH_CMTXSTART,
+       XVE_WQ_START_CMTXREAP,
+       XVE_WQ_FINISH_CMTXREAP,
+       XVE_WQ_START_CMRXREAP,
+       XVE_WQ_FINISH_CMRXREAP,
+       XVE_WQ_DONT_SCHEDULE,
+       XVE_WQ_INVALID,
+       XVE_WQ_FAILED,
+
+       XVE_MISC_MAX_COUNTERS
+};
+
+/* SPEEED CALCULATION */
+enum {
+       SPEED_SDR = 2500,
+       SPEED_DDR = 5000,
+       SPEED_QDR = 10000,
+       SPEED_FDR10 = 10313,
+       SPEED_FDR = 14063,
+       SPEED_EDR = 25781
+};
+
+/*
+ * Quoting 10.3.1 Queue Pair and EE Context States:
+ *
+ * Note, for QPs that are associated with an SRQ, the Consumer should take the
+ * QP through the Error State before invoking a Destroy QP or a Modify QP to the
+ * Reset State.  The Consumer may invoke the Destroy QP without first performing
+ * a Modify QP to the Error State and waiting for the Affiliated Asynchronous
+ * Last WQE Reached Event. However, if the Consumer does not wait for the
+ * Affiliated Asynchronous Last WQE Reached Event, then WQE and Data Segment
+ * leakage may occur. Therefore, it is good programming practice to tear down a
+ * QP that is associated with an SRQ by using the following process:
+ *
+ * - Put the QP in the Error State
+ * - Wait for the Affiliated Asynchronous Last WQE Reached Event;
+ * - either:
+ *       drain the CQ by invoking the Poll CQ verb and either wait for CQ
+ *       to be empty or the number of Poll CQ operations has exceeded
+ *       CQ capacity size;
+ * - or
+ *       post another WR that completes on the same CQ and wait for this
+ *       WR to return as a WC;
+ * - and then invoke a Destroy QP or Reset QP.
+ *
+ * We use the second option and wait for a completion on the
+ * same CQ before destroying QPs attached to our SRQ.
+ */
+
+enum xve_cm_state {
+       XVE_CM_RX_LIVE = 1,
+       XVE_CM_RX_ERROR,        /* Ignored by stale task */
+       XVE_CM_RX_FLUSH         /* Last WQE Reached event observed */
+};
+
+enum {
+       DEBUG_DRV_INFO = 0x00000001,
+       DEBUG_DRV_FUNCTION = 0x00000002,
+       DEBUG_XSMP_INFO = 0x00000004,
+       DEBUG_XSMP_FUNCTION = 0x00000008,
+       DEBUG_IOCTRL_INFO = 0x00000010,
+       DEBUG_IOCTRL_FUNCTION = 0x00000020,
+       DEBUG_TEST_INFO = 0x00000040,
+       DEBUG_DATA_INFO = 0x00000080,
+       DEBUG_MCAST_INFO = 0x00000100,
+       DEBUG_TABLE_INFO = 0x00000200,
+       DEBUG_FLUSH_INFO = 0x00000400,
+       DEBUG_DUMP_PKTS = 0x00000800,
+       DEBUG_SEND_INFO = 0x00001000,
+       DEBUG_CONTINUE_UNLOAD = 0x00002000,
+       DEBUG_MISC_INFO = 0x00004000,
+       DEBUG_IBDEV_INFO = 0x00008000,
+       DEBUG_CM_INFO = 0x00010000
+};
+
+#define        XVE_OP_RECV   (1ul << 31)
+#define XVE_FWT_HASH_LISTS  256
+#define XVE_MACT_HASH_LISTS  32
+#define XVE_ADVERT_PROTO 0x8915
+
+#define        XVE_SYNC_DIRTY          1
+#define        XVE_OS_ADMIN_UP         2
+#define        XVE_CHASSIS_ADMIN_UP            3
+#define        XVE_DELETING                    4
+#define        XVE_SEND_ADMIN_STATE            5
+#define        XVE_PORT_LINK_UP                6
+#define        XVE_OPER_REP_SENT               7
+#define        XVE_START_RESP_RCVD             8
+#define        XVE_OPER_UP                     9
+#define        XVE_STOP_RX_SENT                10
+#define        XVE_XT_DOWN                     11
+#define        XVE_XT_STATE_CHANGE             12
+#define        XVE_SHUTDOWN                    13
+#define        XVE_MCAST_LIST_SENT             14
+#define        XVE_RING_SIZE_CHANGE            15
+#define        XVE_RX_NOBUF                    16
+#define        XVE_INTR_ENABLED                17
+#define        XVE_TRIGGER_NAPI_SCHED          18
+#define        XVE_IBLINK_DOWN                 19
+#define        XVE_MCAST_LIST_PENDING          20
+#define        XVE_MCAST_LIST_TIMEOUT          21
+#define        XVE_CHASSIS_ADMIN_SHADOW_UP     22
+#define        XVE_OVER_QUOTA                  23
+#define        XVE_TSO_CHANGE                  24
+#define        XVE_RXBATCH_CHANGE              25
+#define MODULE_NAME "XVE"
+#define ALIGN_TO_FF(a) (a & 0xff)
+#define XVE_FWT_ENTRY_VALID 1
+#define XVE_FWT_ENTRY_REFRESH 2
+#define XVE_UD_MTU(ib_mtu)             (ib_mtu - VLAN_ETH_HLEN)
+#define XVE_UD_BUF_SIZE(ib_mtu)        (ib_mtu + IB_GRH_BYTES + VLAN_ETH_HLEN)
+#define XVE_MIN_PACKET_LEN 60
+
+/*Extern declarations */
+extern int xve_debug_level;
+extern int xve_cm_single_qp;
+extern u32 xve_hash_salt;
+extern int xve_sendq_size;
+extern int xve_recvq_size;
+extern struct ib_sa_client xve_sa_client;
+extern u32 xve_counters[];
+extern struct workqueue_struct *xve_taskqueue;
+extern struct workqueue_struct *xve_workqueue;
+extern int xve_mc_sendonly_timeout;
+
+extern void xve_remove_procfs_root_entries(void);
+extern int xve_create_procfs_root_entries(void);
+
+
+extern struct mutex xve_mutex;
+extern struct list_head xve_dev_list;
+
+/* structs */
+/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
+struct xve_mcast {
+       struct ib_sa_mcmember_rec mcmember;
+       struct ib_sa_multicast *mc;
+       struct xve_ah *ah;
+
+       struct rb_node rb_node;
+       struct list_head list;
+
+       unsigned long created;
+       unsigned long used;
+       unsigned long backoff;
+       unsigned long flags;
+       unsigned char logcount;
+       struct sk_buff_head pkt_queue;
+       struct net_device *netdev;
+};
+
+struct xve_rx_buf {
+       struct sk_buff *skb;
+       u64 mapping[XVE_UD_RX_SG];
+};
+
+struct xve_tx_buf {
+       struct sk_buff *skb;
+       u64 mapping[MAX_SKB_FRAGS + 1];
+};
+
+struct xve_cm_buf {
+       struct sk_buff *skb;
+       u64 mapping[XVE_CM_RX_SG];
+};
+
+struct ib_cm_id;
+
+struct xve_cm_data {
+       __be32 qpn;             /* High byte MUST be ignored on receive */
+       __be32 mtu;
+};
+
+/* CM connection Estd Direction */
+enum {
+       XVE_CM_ESTD_RX = 1,
+       XVE_CM_ESTD_TX
+};
+
+/* CM Statistics */
+struct xve_cm_stats {
+       unsigned long tx_jiffies;
+       unsigned long rx_jiffies;
+       unsigned long total_rx_bytes;
+       unsigned long total_tx_bytes;
+       u32 tx_rate;
+       u32 rx_rate;
+       u32 tx_bytes;
+       u32 rx_bytes;
+
+};
+
+/* Single QP structure */
+struct xve_cm_ctx {
+       char version[64];
+       struct xve_path *path;
+       struct ib_cm_id *id;
+       struct ib_qp *qp;
+       struct list_head list;
+       struct net_device *netdev;
+       struct xve_cm_buf *tx_ring;
+       struct xve_cm_buf *rx_ring;
+       struct xve_cm_stats stats;
+       union ib_gid dgid;
+       enum xve_cm_state state;
+       unsigned long flags;
+       unsigned long jiffies;
+       u32 mtu;
+       int recv_count;
+       unsigned tx_head;
+       unsigned tx_tail;
+       u8 direction;
+};
+
+struct xve_cm_dev_priv {
+       struct ib_srq *srq;
+       struct xve_cm_buf *srq_ring;
+       struct ib_cm_id *id;
+       struct list_head passive_ids;   /* state: LIVE */
+       struct list_head rx_error_list; /* state: ERROR */
+       struct list_head rx_flush_list; /* state: FLUSH, drain not started */
+       struct list_head rx_drain_list; /* state: FLUSH, drain started */
+       struct list_head rx_reap_list;  /* state: FLUSH, drain done */
+       struct list_head start_list;
+       struct list_head reap_list;
+       struct ib_wc ibwc[XVE_NUM_WC];
+       struct ib_sge rx_sge[XVE_CM_RX_SG];
+       struct ib_recv_wr rx_wr;
+       int nonsrq_conn_qp;
+       int max_cm_mtu;
+       int num_frags;
+};
+
+struct xve_ethtool_st {
+       u16 coalesce_usecs;
+       u16 max_coalesced_frames;
+};
+
+struct xve_lro {
+       struct net_lro_mgr lro_mgr;
+       struct net_lro_desc lro_desc[XVE_MAX_LRO_DESCRIPTORS];
+};
+
+struct xve_fwt_entry {
+       struct list_head list;
+       struct hlist_node hlist;
+       struct xve_path *path;
+       union ib_gid dgid;
+       char smac_addr[ETH_ALEN];
+       unsigned long state;
+       atomic_t ref_cnt;
+       unsigned long last_refresh;
+       int hash_value;
+       u32 dqpn;
+       u16 vlan;
+};
+
+struct xve_fwt_s {
+       struct hlist_head fwt[XVE_FWT_HASH_LISTS];
+       spinlock_t lock;
+       unsigned num;
+};
+
+/*
+ * Device private locking: network stack tx_lock protects members used
+ * in TX fast path, lock protects everything else.  lock nests inside
+ * of tx_lock (ie tx_lock must be acquired first if needed).
+ */
+struct xve_dev_priv {
+       struct list_head list;
+       spinlock_t lock;
+       struct mutex mutex;
+       atomic_t ref_cnt;
+
+       struct ib_device *ca;
+       struct ib_pd *pd;
+       struct ib_mr *mr;
+       struct ib_qp *qp;
+       union ib_gid local_gid;
+       union ib_gid bcast_mgid;
+       u16 local_lid;
+       u32 qkey;
+
+       /* Netdev related attributes */
+       struct net_device *netdev;
+       struct net_device_stats stats;
+       struct napi_struct napi;
+       struct xve_ethtool_st ethtool;
+       u8 lro_mode;
+       struct xve_lro lro;
+       unsigned long flags;
+       unsigned long state;
+
+       struct rb_root path_tree;
+       struct list_head path_list;
+       struct xve_mcast *broadcast;
+       struct list_head multicast_list;
+       struct rb_root multicast_tree;
+
+       struct delayed_work sm_work;
+       struct delayed_work stale_task;
+       struct delayed_work mcast_leave_task;
+       struct delayed_work mcast_join_task;
+       int sm_delay;
+       unsigned int send_hbeat_flag;
+       unsigned long jiffies;
+       struct xve_fwt_s xve_fwt;
+       int aging_delay;
+
+       struct xve_cm_dev_priv cm;
+       unsigned int cm_supported;
+
+       struct ib_port_attr port_attr;
+       u8 port;
+       u16 pkey;
+       u16 pkey_index;
+       int port_speed;
+       int hca_caps;
+       unsigned int admin_mtu;
+       unsigned int mcast_mtu;
+       unsigned int max_ib_mtu;
+       char mode[64];
+
+       /* TX and RX Ring attributes */
+       struct xve_rx_buf *rx_ring;
+       struct xve_tx_buf *tx_ring;
+       unsigned tx_head;
+       unsigned tx_tail;
+       unsigned tx_outstanding;
+       struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
+       struct ib_send_wr tx_wr;
+       struct ib_wc send_wc[MAX_SEND_CQE];
+       struct ib_recv_wr rx_wr;
+       struct ib_sge rx_sge[XVE_UD_RX_SG];
+       struct ib_wc ibwc[XVE_NUM_WC];
+       struct ib_cq *recv_cq;
+       struct ib_cq *send_cq;
+       struct list_head dead_ahs;
+       struct ib_event_handler event_handler;
+
+       /* XSMP related attributes */
+       xsmp_cookie_t xsmp_hndl;
+       struct xsmp_session_info xsmp_info;
+       u64 resource_id;
+       u64 mac;
+       u32 net_id;
+       u16 mp_flag;
+       char vnet_mode;
+       char xve_name[XVE_MAX_NAME_SIZE];
+
+       /* Proc related attributes */
+       struct proc_dir_entry *nic_dir;
+       unsigned long work_queue_failed;
+       char proc_name[XVE_MAX_PROC_NAME_SIZE];
+       u32 counters[XVE_MAX_COUNTERS];
+       u32 misc_counters[XVE_MISC_MAX_COUNTERS];
+       int sindex;
+       int jindex;
+       u16 counters_cleared;
+       u8 next_page;
+       int ix;
+};
+
+struct xve_ah {
+       struct net_device *dev;
+       struct ib_ah *ah;
+       struct list_head list;
+       struct kref ref;
+       unsigned last_send;
+};
+
+struct ib_packed_grh {
+       u8 ip_version;
+       u8 traffic_class;
+       u16 flow_label;
+       u16 payload_length;
+       u8 next_header;
+       u8 hop_limit;
+       union ib_gid source_gid;
+       union ib_gid destination_gid;
+};
+
+struct xve_path {
+       struct net_device *dev;
+       struct xve_cm_ctx *cm_ctx_common;
+       struct xve_cm_ctx *cm_ctx_tx;
+       struct xve_cm_ctx *cm_ctx_rx;
+       struct ib_sa_path_rec pathrec;
+       struct xve_ah *ah;
+       int query_id;
+       struct ib_sa_query *query;
+       struct completion done;
+       struct list_head fwt_list;
+       struct rb_node rb_node;
+       struct list_head list;
+       int valid;
+       struct sk_buff_head queue;
+};
+
+struct xve_work {
+       struct work_struct work;
+       struct delayed_work dwork;
+       xsmp_cookie_t xsmp_hndl;
+       struct xve_dev_priv *priv;
+       int len;
+       int status;
+       u8 *msg;
+};
+
+struct icmp6_ndp {
+       unsigned char icmp6_type;
+       unsigned char icmp6_code;
+       unsigned short int icmp6_cksum;
+       unsigned int icmp6_reserved;
+       unsigned char icmp6_daddr[16];
+       unsigned char icmp6_option_type;
+       unsigned char icmp6_option_len;
+       unsigned char icmp6_option_saddr[6];
+};
+
+#define INC_TX_DROP_STATS(priv, dev)            \
+       do {                                    \
+               ++dev->stats.tx_dropped;        \
+               ++priv->stats.tx_dropped;       \
+       } while (0)
+#define INC_TX_ERROR_STATS(priv, dev)          \
+       do {                                    \
+               ++priv->stats.tx_errors;        \
+               ++dev->stats.tx_errors;         \
+       } while (0)
+#define INC_TX_PKT_STATS(priv, dev)             \
+       do {                                    \
+               ++priv->stats.tx_packets;       \
+               ++dev->stats.tx_packets;        \
+       } while (0)
+#define INC_TX_BYTE_STATS(priv, dev, len)      \
+       do {                                    \
+               priv->stats.tx_bytes += len;    \
+               dev->stats.tx_bytes += len;     \
+       } while (0)
+#define INC_RX_DROP_STATS(priv, dev)            \
+       do {                                    \
+               ++dev->stats.rx_dropped;        \
+               ++priv->stats.rx_dropped;       \
+       } while (0)
+#define INC_RX_ERROR_STATS(priv, dev)           \
+       do {                                    \
+               ++priv->stats.rx_errors;        \
+               ++dev->stats.rx_errors;         \
+       } while (0)
+#define INC_RX_PKT_STATS(priv, dev)             \
+       do {                                    \
+               ++priv->stats.rx_packets;       \
+               ++dev->stats.rx_packets;        \
+       } while (0)
+
+#define INC_RX_BYTE_STATS(priv, dev, len)                      \
+       do {                                                    \
+               priv->stats.rx_bytes += len;                    \
+               dev->stats.rx_bytes += len;                     \
+       } while (0)
+
+#define SET_FLUSH_BIT(priv, bit)                               \
+       do {                                                    \
+               unsigned long flags;                            \
+               spin_lock_irqsave(&priv->lock, flags);          \
+               set_bit(bit, &priv->state);                     \
+               spin_unlock_irqrestore(&priv->lock, flags);     \
+       } while (0)
+
+#define PRINT(level, x, fmt, arg...)                           \
+       printk(level "%s: " fmt, MODULE_NAME, ##arg)
+#define XSMP_ERROR(fmt, arg...)                                        \
+       PRINT(KERN_ERR, "XSMP", fmt, ##arg)
+#define xve_printk(level, priv, format, arg...)                        \
+       printk(level "%s: " format,                             \
+               ((struct xve_dev_priv *) priv)->netdev->name,   \
+               ## arg)
+#define xve_warn(priv, format, arg...)                         \
+       xve_printk(KERN_WARNING, priv, format , ## arg)
+
+#define XSMP_INFO(fmt, arg...)                                 \
+       do {                                                    \
+               if (xve_debug_level & DEBUG_XSMP_INFO)          \
+                       PRINT(KERN_DEBUG, "XSMP", fmt , ## arg);\
+       } while (0)
+
+#define xve_test(fmt, arg...)                                  \
+       do {                                                    \
+               if (xve_debug_level & DEBUG_TEST_INFO)          \
+                       PRINT(KERN_DEBUG, "DEBUG", fmt , ## arg); \
+       } while (0)
+
+#define xve_dbg_data(priv, format, arg...)                     \
+       do {                                                    \
+               if (xve_debug_level & DEBUG_DATA_INFO)          \
+                       xve_printk(KERN_DEBUG, priv, format,    \
+                       ## arg);                                \
+       } while (0)
+#define xve_dbg_mcast(priv, format, arg...)                    \
+       do {                                                    \
+               if (xve_debug_level & DEBUG_MCAST_INFO)         \
+                       xve_printk(KERN_ERR, priv, format , ## arg); \
+       } while (0)
+#define xve_debug(level, priv, format, arg...)                         \
+       do {                                                            \
+               if (xve_debug_level & level) {                          \
+                       if (priv)                                       \
+                               printk("%s: " format,                   \
+                               ((struct xve_dev_priv *) priv)->netdev->name, \
+                               ## arg);                                \
+                       else                                            \
+                               printk("XVE: " format, ## arg);         \
+               }                                                       \
+       } while (0)
+
+static inline void update_cm_rx_rate(struct xve_cm_ctx *rx_qp, ulong bytes)
+{
+       rx_qp->stats.total_rx_bytes += bytes;
+       rx_qp->stats.rx_bytes += bytes;
+
+       /* update the rate once in two seconds */
+       if ((jiffies - rx_qp->stats.rx_jiffies) > 2 * (HZ)) {
+               u32 r;
+
+               r = rx_qp->stats.rx_bytes /
+                   ((jiffies - rx_qp->stats.rx_jiffies) / (HZ));
+               r = (r / 1000000);      /* MB/Sec */
+               /* Mega Bits/Sec */
+               rx_qp->stats.rx_rate = (r * 8);
+               rx_qp->stats.rx_jiffies = jiffies;
+               rx_qp->stats.rx_bytes = 0;
+       }
+}
+
+static inline void update_cm_tx_rate(struct xve_cm_ctx *tx_qp, ulong bytes)
+{
+       tx_qp->stats.total_tx_bytes += bytes;
+       tx_qp->stats.tx_bytes += bytes;
+
+       /* update the rate once in two seconds */
+       if ((jiffies - tx_qp->stats.tx_jiffies) > 2 * (HZ)) {
+               u32 r;
+
+               r = tx_qp->stats.tx_bytes /
+                   ((jiffies - tx_qp->stats.tx_jiffies) / (HZ));
+               r = (r / 1000000);      /* MB/Sec */
+               /* Mega Bits/Sec */
+               tx_qp->stats.tx_rate = (r * 8);
+               tx_qp->stats.tx_jiffies = jiffies;
+               tx_qp->stats.tx_bytes = 0;
+       }
+}
+
+static inline int xve_ud_need_sg(unsigned int ib_mtu)
+{
+       return XVE_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
+}
+
+static inline struct page *xve_alloc_page(gfp_t alloc_flags)
+{
+       xve_counters[XVE_NUM_PAGES_ALLOCED]++;
+       return alloc_page(alloc_flags);
+}
+
+static inline void xve_send_skb(struct xve_dev_priv *priv, struct sk_buff *skb)
+{
+       struct net_device *netdev = priv->netdev;
+
+       if (netdev->features & NETIF_F_LRO)
+               lro_receive_skb(&priv->lro.lro_mgr, skb, NULL);
+       else
+               netif_receive_skb(skb);
+
+       netdev->last_rx = jiffies;
+       INC_RX_BYTE_STATS(priv, netdev, skb->len);
+       INC_RX_PKT_STATS(priv, netdev);
+}
+
+static inline struct sk_buff *xve_dev_alloc_skb(struct xve_dev_priv *priv,
+                                               unsigned int size)
+{
+
+       struct sk_buff *skb = dev_alloc_skb(size);
+
+       if (skb)
+               priv->counters[XVE_RX_SKB_ALLOC_COUNTER]++;
+       return skb;
+}
+
+static inline void xve_dev_kfree_skb_any(struct xve_dev_priv *priv,
+                                        struct sk_buff *skb, u8 type)
+{
+
+       if (type)
+               priv->counters[XVE_TX_SKB_FREE_COUNTER]++;
+       else
+               priv->counters[XVE_RX_SKB_FREE_COUNTER]++;
+
+       if (skb)
+               dev_kfree_skb_any(skb);
+
+}
+
+static inline int xve_cm_admin_enabled(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       return priv->cm_supported && test_bit(XVE_FLAG_ADMIN_CM, &priv->flags);
+}
+
+static inline int xve_cm_enabled(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       return priv->cm_supported && test_bit(XVE_FLAG_ADMIN_CM, &priv->flags);
+}
+
+static inline int xve_cm_up(struct xve_path *path)
+{
+       if (xve_cm_single_qp)
+               return test_bit(XVE_FLAG_OPER_UP, &path->cm_ctx_common->flags);
+       else
+               return test_bit(XVE_FLAG_OPER_UP, &path->cm_ctx_tx->flags);
+}
+
+static inline struct xve_cm_ctx *xve_get_cmctx(struct xve_path *path)
+{
+       return path->cm_ctx_common;
+}
+
+static inline struct xve_cm_ctx *xve_cmtx_get(struct xve_path *path)
+{
+       if (xve_cm_single_qp)
+               return path->cm_ctx_common;
+       else
+               return path->cm_ctx_tx;
+}
+
+static inline struct xve_cm_ctx *xve_cmrx_get(struct xve_path *path)
+{
+       return path->cm_ctx_rx;
+}
+
+static inline void xve_cm_set(struct xve_path *path, struct xve_cm_ctx *tx)
+{
+       if (xve_cm_single_qp)
+               path->cm_ctx_common = tx;
+       else
+               path->cm_ctx_tx = tx;
+}
+
+static inline int xve_cm_has_srq(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       return !!priv->cm.srq;
+}
+
+static inline unsigned int xve_cm_max_mtu(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       return priv->cm.max_cm_mtu;
+}
+
+static inline void xve_put_ctx(struct xve_dev_priv *priv)
+{
+       atomic_dec(&priv->ref_cnt);
+}
+
+/* Adjust length of skb with fragments to match received data */
+static inline void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
+                                unsigned int length, struct sk_buff *toskb)
+{
+       int i, num_frags;
+       unsigned int size;
+
+       /* put header into skb */
+       size = min(length, hdr_space);
+       skb->tail += size;
+       skb->len += size;
+       length -= size;
+
+       num_frags = skb_shinfo(skb)->nr_frags;
+       for (i = 0; i < num_frags; i++) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               if (length == 0) {
+                       /* don't need this page */
+                       skb_fill_page_desc(toskb, i, skb_frag_page(frag),
+                                          0, PAGE_SIZE);
+                       --skb_shinfo(skb)->nr_frags;
+               } else {
+                       size = min_t(unsigned, length, (unsigned)PAGE_SIZE);
+
+                       frag->size = size;
+                       skb->data_len += size;
+                       skb->truesize += size;
+                       skb->len += size;
+                       length -= size;
+               }
+       }
+}
+
+/* functions */
+int xve_poll(struct napi_struct *napi, int budget);
+void xve_ib_completion(struct ib_cq *cq, void *dev_ptr);
+void xve_data_recv_handler(struct xve_dev_priv *priv);
+void xve_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
+struct xve_ah *xve_create_ah(struct net_device *dev,
+                            struct ib_pd *pd, struct ib_ah_attr *attr);
+void xve_free_ah(struct kref *kref);
+static inline void xve_put_ah(struct xve_ah *ah)
+{
+       kref_put(&ah->ref, xve_free_ah);
+}
+
+int xve_open(struct net_device *dev);
+int xve_add_pkey_attr(struct net_device *dev);
+
+void xve_send(struct net_device *dev, struct sk_buff *skb,
+             struct xve_ah *address, u32 qpn);
+int poll_tx(struct xve_dev_priv *priv);
+int xve_xsmp_send_oper_state(struct xve_dev_priv *priv, u64 vid, int state);
+void handle_carrier_state(struct xve_dev_priv *priv, char state);
+void queue_sm_work(struct xve_dev_priv *priv, int msecs);
+void queue_age_work(struct xve_dev_priv *priv, int msecs);
+
+void xve_mark_paths_invalid(struct net_device *dev);
+void xve_flush_paths(struct net_device *dev);
+void xve_flush_single_path(struct net_device *dev, struct xve_path *path);
+void xve_flush_single_path_by_gid(struct net_device *dev, union ib_gid *gid);
+struct xve_dev_priv *xve_intf_alloc(const char *format);
+
+int xve_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
+void xve_ib_dev_cleanup(struct net_device *dev);
+void xve_ib_dev_flush_light(struct work_struct *work);
+void xve_ib_dev_flush_normal(struct work_struct *work);
+void xve_ib_dev_flush_heavy(struct work_struct *work);
+void xve_pkey_event(struct work_struct *work);
+void xve_reap_ah(struct work_struct *work);
+void xve_cm_stale_task(struct work_struct *work);
+void xve_mcast_join_task(struct work_struct *work);
+void xve_mcast_leave_task(struct work_struct *work);
+void xve_mcast_restart_task(struct work_struct *work);
+void xve_cm_tx_start(struct work_struct *work);
+void xve_cm_tx_reap(struct work_struct *work);
+void xve_cm_rx_reap(struct work_struct *work);
+void xve_state_machine_work(struct work_struct *work);
+void xve_pkey_poll(struct work_struct *work);
+void xve_start_aging_work(struct work_struct *work);
+void xve_mcast_carrier_on_task(struct work_struct *work);
+
+int xve_ib_dev_open(struct net_device *dev);
+int xve_ib_dev_up(struct net_device *dev);
+int xve_ib_dev_down(struct net_device *dev, int flush);
+int xve_ib_dev_stop(struct net_device *dev, int flush);
+
+int xve_dev_init(struct net_device *dev, struct ib_device *ca, int port);
+void xve_dev_cleanup(struct net_device *dev);
+void xve_fwt_entry_destroy(struct xve_dev_priv *priv,
+                          struct xve_fwt_entry *fwt_entry);
+void xve_remove_fwt_entry(struct xve_dev_priv *priv,
+                         struct xve_fwt_entry *fwt_entry);
+void xve_fwt_entry_free(struct xve_dev_priv *priv,
+                       struct xve_fwt_entry *fwt_entry);
+
+void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
+void xve_advert_mcast_join(struct xve_dev_priv *priv);
+int xve_mcast_start_thread(struct net_device *dev);
+int xve_mcast_stop_thread(struct net_device *dev, int flush);
+
+void xve_mcast_dev_down(struct net_device *dev);
+void xve_mcast_dev_flush(struct net_device *dev);
+int xve_mcast_attach(struct net_device *dev, u16 mlid,
+                    union ib_gid *mgid, int set_qkey);
+
+int xve_init_qp(struct net_device *dev);
+int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca);
+void xve_transport_dev_cleanup(struct net_device *dev);
+
+void xve_event(struct ib_event_handler *handler, struct ib_event *record);
+
+int xve_pkey_dev_delay_open(struct net_device *dev);
+void xve_drain_cq(struct net_device *dev);
+
+void xve_set_ethtool_ops(struct net_device *dev);
+int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca);
+int xve_modify_mtu(struct net_device *netdev, int new_mtu);
+
+struct sk_buff *xve_generate_query(struct xve_dev_priv *priv,
+                                  struct sk_buff *skb);
+struct sk_buff *xve_create_arp(struct xve_dev_priv *priv,
+                              struct sk_buff *org_skb);
+struct sk_buff *xve_create_ndp(struct xve_dev_priv *priv,
+                              struct sk_buff *org_skb);
+int xve_send_hbeat(struct xve_dev_priv *xvep);
+void xve_xsmp_handle_oper_req(xsmp_cookie_t xsmp_hndl, u64 resource_id);
+
+/*CM */
+void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
+                struct xve_cm_ctx *tx);
+int xve_cm_dev_open(struct net_device *dev);
+void xve_cm_dev_stop(struct net_device *dev);
+int xve_cm_dev_init(struct net_device *dev);
+void xve_cm_dev_cleanup(struct net_device *dev);
+struct xve_cm_ctx *xve_cm_create_tx(struct net_device *dev,
+                                   struct xve_path *path);
+void xve_cm_destroy_tx_deferred(struct xve_cm_ctx *tx);
+void xve_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
+                        unsigned int mtu);
+void xve_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc);
+void xve_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc);
+
+int xve_tables_init(void);
+void xve_fwt_init(struct xve_fwt_s *xve_fwt);
+void xve_fwt_insert(struct xve_dev_priv *priv, struct xve_cm_ctx *ctx,
+                   union ib_gid *gid, u32 qpn, char *smac, u16 vlan);
+void xve_fwt_cleanup(struct xve_dev_priv *xvep);
+int xve_advert_process(struct xve_dev_priv *priv, struct sk_buff *skb);
+struct xve_fwt_entry *xve_fwt_lookup(struct xve_fwt_s *xve_fwt, char *mac,
+                                    u16 vlan, int refresh);
+void xve_fwt_put_ctx(struct xve_fwt_s *xve_fwt,
+                    struct xve_fwt_entry *fwt_entry);
+struct xve_fwt_entry *xve_fwt_list(struct xve_fwt_s *xve_fwt, int val);
+bool xve_fwt_entry_valid(struct xve_fwt_s *xve_fwt,
+                        struct xve_fwt_entry *fwt_entry);
+void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path,
+                         int do_lock);
+int xve_aging_task_machine(struct xve_dev_priv *priv);
+void xve_prepare_skb(struct xve_dev_priv *priv, struct sk_buff *skb);
+void xve_tables_exit(void);
+void xve_remove_one(struct xve_dev_priv *priv);
+struct xve_path *__path_find(struct net_device *netdev, void *gid);
+extern int xve_add_proc_entry(struct xve_dev_priv *vp);
+void xve_remove_proc_entry(struct xve_dev_priv *vp);
+extern int xve_change_rxbatch(struct xve_dev_priv *xvep, int flag);
+
+static inline int xve_continue_unload(void)
+{
+       return !(xve_debug_level & DEBUG_CONTINUE_UNLOAD);
+}
+
+static inline int xve_get_misc_info(void)
+{
+       return xve_debug_level & DEBUG_MISC_INFO;
+}
+
+static inline int xg_vlan_tx_tag_present(struct sk_buff *skb)
+{
+       struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+
+       return veth->h_vlan_proto == htons(ETH_P_8021Q);
+}
+
+static inline u16 xg_vlan_get_rxtag(struct sk_buff *skb)
+{
+       struct ethhdr *eh = (struct ethhdr *)(skb->data);
+       u16 vlan_tci = 0xFFFF;
+
+       if (eh->h_proto == htons(ETH_P_8021Q)) {
+               struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+
+               vlan_tci = be16_to_cpu(veth->h_vlan_TCI);
+       } else {
+               vlan_tci = 0;
+       }
+
+       return vlan_tci;
+
+}
+
+/*
+ * ipoib_calc_speed - calculate port speed
+ *
+ * @priv - device private data
+ *
+ * RETURNS: actual port speed
+ */
+static inline unsigned int xve_calc_speed(struct xve_dev_priv *priv)
+{
+       struct ib_port_attr *attr = &priv->port_attr;
+       unsigned int link_speed;
+       int port_width;
+
+       if (!priv)
+               return 0;
+
+       switch (attr->active_speed) {
+       case 0x1:
+               link_speed = SPEED_SDR;
+               break;
+       case 0x2:
+               link_speed = SPEED_DDR;
+               break;
+       case 0x4:
+               link_speed = SPEED_QDR;
+               break;
+       case 0x8:
+               link_speed = SPEED_FDR10;
+               break;
+       case 0x10:
+               link_speed = SPEED_FDR;
+               break;
+       case 0x20:
+               link_speed = SPEED_EDR;
+               break;
+       default:
+               link_speed = 0;
+       }
+
+       port_width = ib_width_enum_to_int(attr->active_width);
+       if (port_width < 0)
+               port_width = 0;
+
+       return link_speed * port_width;
+}
+
+/* Work queue functions */
+static inline void xve_queue_work(struct xve_dev_priv *priv, int work_type)
+{
+       struct xve_work *work;
+
+       if (test_bit(XVE_DELETING, &priv->flags)) {
+               priv->misc_counters[XVE_WQ_DONT_SCHEDULE]++;
+               return;
+       }
+
+       work = kmalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work)
+               return;
+       work->priv = priv;
+
+       switch (work_type) {
+       case XVE_WQ_START_CMTXSTART:
+               INIT_WORK(&work->work, xve_cm_tx_start);
+               break;
+       case XVE_WQ_START_CMTXREAP:
+               INIT_WORK(&work->work, xve_cm_tx_reap);
+               break;
+       case XVE_WQ_START_CMRXREAP:
+               INIT_WORK(&work->work, xve_cm_rx_reap);
+               break;
+       case XVE_WQ_START_MCASTON:
+               INIT_WORK(&work->work, xve_mcast_carrier_on_task);
+               break;
+       case XVE_WQ_START_MCASTRESTART:
+               INIT_WORK(&work->work, xve_mcast_restart_task);
+               break;
+       case XVE_WQ_START_FLUSHLIGHT:
+               INIT_WORK(&work->work, xve_ib_dev_flush_light);
+               break;
+       case XVE_WQ_START_FLUSHNORMAL:
+               INIT_WORK(&work->work, xve_ib_dev_flush_normal);
+               break;
+       case XVE_WQ_START_FLUSHHEAVY:
+               INIT_WORK(&work->work, xve_ib_dev_flush_heavy);
+               break;
+       default:
+               priv->misc_counters[XVE_WQ_INVALID]++;
+               kfree(work);
+               work = NULL;
+               break;
+       }
+
+       if (!work)
+               return;
+
+       if (queue_work(xve_taskqueue, &work->work) != 0) {
+               atomic_inc(&priv->ref_cnt);
+               priv->misc_counters[work_type]++;
+       } else {
+               priv->misc_counters[XVE_WQ_FAILED]++;
+               priv->work_queue_failed = work_type;
+       }
+
+}
+
+static inline void xve_queue_dwork(struct xve_dev_priv *priv, int work_type,
+                                  u64 time)
+{
+
+       struct xve_work *work;
+
+       if (test_bit(XVE_DELETING, &priv->flags)) {
+               priv->misc_counters[XVE_WQ_DONT_SCHEDULE]++;
+               return;
+       }
+
+       work = kmalloc(sizeof(*work), GFP_ATOMIC);
+       if (!work)
+               return;
+       work->priv = priv;
+
+       switch (work_type) {
+       case XVE_WQ_START_PKEYPOLL:
+               INIT_DELAYED_WORK(&work->dwork, xve_pkey_poll);
+               break;
+       case XVE_WQ_START_AHREAP:
+               INIT_DELAYED_WORK(&work->dwork, xve_reap_ah);
+               break;
+       case XVE_WQ_START_FWT_AGING:
+               INIT_DELAYED_WORK(&work->dwork, xve_start_aging_work);
+               break;
+
+       default:
+               priv->misc_counters[XVE_WQ_INVALID]++;
+               kfree(work);
+               work = NULL;
+               break;
+       }
+
+       if (!work)
+               return;
+
+       if (queue_delayed_work(xve_taskqueue, &work->dwork, time) != 0) {
+               atomic_inc(&priv->ref_cnt);
+               priv->misc_counters[work_type]++;
+       } else {
+               priv->misc_counters[XVE_WQ_FAILED]++;
+               priv->work_queue_failed = work_type;
+       }
+
+}
+
+static inline void xve_queue_complete_work(struct xve_dev_priv *priv,
+                                          int work_type, u64 time)
+{
+       if (test_bit(XVE_DELETING, &priv->flags)) {
+               priv->misc_counters[XVE_WQ_DONT_SCHEDULE]++;
+               return;
+       }
+
+       switch (work_type) {
+       case XVE_WQ_START_CMSTALE:
+               queue_delayed_work(xve_taskqueue, &priv->stale_task, time);
+               break;
+       case XVE_WQ_START_MCASTJOIN:
+               queue_delayed_work(xve_taskqueue, &priv->mcast_join_task, time);
+               break;
+       case XVE_WQ_START_MCASTLEAVE:
+               queue_delayed_work(xve_taskqueue, &priv->mcast_leave_task,
+                                  time);
+               break;
+       default:
+               priv->misc_counters[XVE_WQ_INVALID]++;
+               break;
+       }
+
+       priv->misc_counters[work_type]++;
+
+}
+
+static inline struct xve_dev_priv *xve_get_wqctx(struct work_struct *work,
+                                                int work_type, u8 code)
+{
+       struct xve_work *xwork;
+       struct xve_dev_priv *priv;
+
+/*
+ * 2 For getting work strucute complete, 1 for Delayed work and
+ * 0 for Work structures
+ */
+       if (code == 2) {
+               switch (work_type) {
+               case XVE_WQ_FINISH_CMSTALE:
+                       priv =
+                           container_of(work, struct xve_dev_priv,
+                                        stale_task.work);
+                       break;
+               case XVE_WQ_FINISH_MCASTJOIN:
+                       priv =
+                           container_of(work, struct xve_dev_priv,
+                                        mcast_join_task.work);
+                       break;
+               case XVE_WQ_FINISH_MCASTLEAVE:
+                       priv =
+                           container_of(work, struct xve_dev_priv,
+                                        mcast_leave_task.work);
+                       break;
+               default:
+                       return NULL;
+               }
+       } else {
+               if (code == 1)
+                       xwork = container_of(work, struct xve_work, dwork.work);
+               else
+                       xwork = container_of(work, struct xve_work, work);
+               priv = xwork->priv;
+               kfree(xwork);
+       }
+       priv->misc_counters[work_type]++;
+       return priv;
+}
+
+/* DEBUG FUNCTIONS */
+static inline void dbg_dump_raw_pkt(unsigned char *buff, int length, char *name)
+{
+       int i;
+       int tmp_len;
+       u32 *data_ptr;
+       unsigned char *tmp_data_ptr;
+
+       if (!(xve_debug_level & DEBUG_TEST_INFO))
+               return;
+
+       printk("%s. Packet length is %d\n", name, length);
+       tmp_len = (length >> 2) + 1;
+       data_ptr = (u32 *) buff;
+       for (i = 0; i < tmp_len; i++) {
+               tmp_data_ptr = (unsigned char *)&data_ptr[i];
+               pr_info("%02x %02x %02x %02x\n",
+                       tmp_data_ptr[0], tmp_data_ptr[1],
+                       tmp_data_ptr[2], tmp_data_ptr[3]);
+       }
+}
+
+static inline void dbg_dump_skb(struct sk_buff *skb)
+{
+       char prefix[32];
+
+       if (!(xve_debug_level & DEBUG_TEST_INFO))
+               return;
+       snprintf(prefix, 32, "%s:skb-%p", skb->dev ? skb->dev->name : "NULL ",
+                skb);
+
+       pr_info("[%s] --- skb dump ---\n", prefix);
+       pr_info("[%s] len     : %d\n", prefix, skb->len);
+       pr_info("[%s] truesize: %d\n", prefix, skb->truesize);
+       pr_info("[%s] data_len: %d\n", prefix, skb->data_len);
+       pr_info("[%s] nr_frags: %d\n", prefix, skb_shinfo(skb)->nr_frags);
+       pr_info("[%s] data    : %p\n", prefix, (void *)skb->data);
+       pr_info("[%s] head    : %p\n", prefix, (void *)skb->head);
+       pr_info("\n");
+
+}
+
+static inline void dumppkt(unsigned char *pkt, unsigned short len, char *name)
+{
+       int i = 0;
+       unsigned char *p = (unsigned char *)pkt;
+       char line[64] = { 0 };
+       char *cp = line;
+       char filter[] = "0123456789abcdef";
+       int printed_line = 0;
+
+       if (!(xve_debug_level & DEBUG_DUMP_PKTS))
+               return;
+
+       pr_info("%s DumpPacket of %d\n", name, len);
+
+       for (i = 0; i < len; i++) {
+               if ((i != 0) && (i % 8 == 0)) {
+                       pr_info("%s\n", line);
+                       memset(line, 0, sizeof(line));
+                       cp = line;
+                       printed_line = 1;
+               } else {
+                       printed_line = 0;
+               }
+
+               if (*p > 0x0f)
+                       *cp++ = filter[*p >> 4];
+               else
+                       *cp++ = filter[0];
+
+               *cp++ = filter[*p++ & 0xf];
+               *cp++ = ':';
+               if (((len - i) == 1) && !printed_line) {
+                       pr_info("%s\n", line);
+                       memset(line, 0, sizeof(line));
+                       cp = line;
+               }
+       }
+       *--cp = 0;
+}
+
+static inline void print_mgid(char *bcast_mgid_token, int debug)
+{
+       if (!debug && !(xve_debug_level & DEBUG_TEST_INFO))
+               return;
+       pr_info("MGID %x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x\n",
+               bcast_mgid_token[0] & 0xff, bcast_mgid_token[1] & 0xff,
+               bcast_mgid_token[2] & 0xff, bcast_mgid_token[3] & 0xff,
+               bcast_mgid_token[4] & 0xff, bcast_mgid_token[5] & 0xff,
+               bcast_mgid_token[6] & 0xff, bcast_mgid_token[7] & 0xff,
+               bcast_mgid_token[8] & 0xff, bcast_mgid_token[9] & 0xff,
+               bcast_mgid_token[10] & 0xff, bcast_mgid_token[11] & 0xff,
+               bcast_mgid_token[12] & 0xff, bcast_mgid_token[13] & 0xff,
+               bcast_mgid_token[14] & 0xff, bcast_mgid_token[15] & 0xff);
+}
+
+static inline void print_mgid_buf(char buffer[], char *bcast_mgid_token)
+{
+       sprintf(buffer, "%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x   ",
+               bcast_mgid_token[0] & 0xff, bcast_mgid_token[1] & 0xff,
+               bcast_mgid_token[2] & 0xff, bcast_mgid_token[3] & 0xff,
+               bcast_mgid_token[4] & 0xff, bcast_mgid_token[5] & 0xff,
+               bcast_mgid_token[6] & 0xff, bcast_mgid_token[7] & 0xff,
+               bcast_mgid_token[8] & 0xff, bcast_mgid_token[9] & 0xff,
+               bcast_mgid_token[10] & 0xff, bcast_mgid_token[11] & 0xff,
+               bcast_mgid_token[12] & 0xff, bcast_mgid_token[13] & 0xff,
+               bcast_mgid_token[14] & 0xff, bcast_mgid_token[15] & 0xff);
+}
+
+#endif /* _XVE_H */
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_cm.c b/drivers/infiniband/ulp/xsigo/xve/xve_cm.c
new file mode 100644 (file)
index 0000000..7c68f8f
--- /dev/null
@@ -0,0 +1,1415 @@
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+static int retry_count = 1;
+module_param_named(retry_count, retry_count, int, 0644);
+MODULE_PARM_DESC(retry_count, "Max number IB retries");
+
+static int rnr_retry_count = 4;
+module_param_named(rnr_retry_count, rnr_retry_count, int, 0644);
+MODULE_PARM_DESC(rnr_retry_count, "Max number rnr retries");
+
+#define XVE_CM_IETF_ID 0x1000000000000000ULL
+
+#define XVE_CM_RX_UPDATE_TIME (256 * HZ)
+#define XVE_CM_RX_TIMEOUT     (2 * 256 * HZ)
+#define XVE_CM_RX_DELAY       (3 * 256 * HZ)
+#define XVE_CM_RX_UPDATE_MASK (0x3)
+
+static struct ib_qp_attr xve_cm_err_attr = {
+       .qp_state = IB_QPS_ERR
+};
+
+#define XVE_CM_RX_DRAIN_WRID 0xffffffff
+
+static struct ib_send_wr xve_cm_rx_drain_wr = {
+       .wr_id = XVE_CM_RX_DRAIN_WRID,
+       .opcode = IB_WR_SEND,
+};
+
+static int xve_cm_tx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
+static void __xve_cm_tx_reap(struct xve_dev_priv *priv);
+
+static void xve_cm_dma_unmap_rx(struct xve_dev_priv *priv, int frags,
+                               u64 mapping[XVE_CM_RX_SG])
+{
+       int i;
+
+       ib_dma_unmap_single(priv->ca, mapping[0], XVE_CM_HEAD_SIZE,
+                           DMA_FROM_DEVICE);
+
+       for (i = 0; i < frags; ++i) {
+               xve_counters[XVE_NUM_PAGES_ALLOCED]--;
+               ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE,
+                                   DMA_FROM_DEVICE);
+       }
+}
+
+static int xve_cm_post_receive_srq(struct net_device *netdev, int id)
+{
+       struct xve_dev_priv *priv = netdev_priv(netdev);
+       struct ib_recv_wr *bad_wr;
+       int i, ret;
+
+       priv->cm.rx_wr.wr_id = id | XVE_OP_CM | XVE_OP_RECV;
+
+       for (i = 0; i < priv->cm.num_frags; ++i)
+               priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
+
+       ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
+       if (unlikely(ret)) {
+               xve_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
+               xve_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
+                                   priv->cm.srq_ring[id].mapping);
+               dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
+               priv->cm.srq_ring[id].skb = NULL;
+       }
+
+       return ret;
+}
+
+static struct sk_buff *xve_cm_alloc_rx_skb(struct net_device *dev,
+                                          struct xve_cm_buf *rx_ring,
+                                          int id, int frags,
+                                          u64 mapping[XVE_CM_RX_SG])
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct sk_buff *skb;
+       int i;
+
+       skb = xve_dev_alloc_skb(priv, XVE_CM_HEAD_SIZE + NET_IP_ALIGN);
+       if (unlikely(!skb)) {
+               xve_warn(priv, "%s Failed to allocate skb\n", __func__);
+               return NULL;
+       }
+
+       skb_reserve(skb, NET_IP_ALIGN);
+
+       mapping[0] = ib_dma_map_single(priv->ca, skb->data, XVE_CM_HEAD_SIZE,
+                                      DMA_FROM_DEVICE);
+       if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {
+               xve_warn(priv, "%s Failed to Map skb\n", __func__);
+               dev_kfree_skb_any(skb);
+               return NULL;
+       }
+
+       for (i = 0; i < frags; i++) {
+               gfp_t alloc_flags = GFP_ATOMIC;
+               struct page *page = xve_alloc_page(alloc_flags);
+
+               if (!page) {
+                       xve_warn(priv,
+                                "%s Failed to allocate flags %x page state %d\n",
+                                __func__, alloc_flags,
+                                test_bit(XVE_OPER_UP, &priv->state));
+                       goto partial_error;
+               }
+               skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
+
+               mapping[i + 1] =
+                   ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page.p,
+                                   0, PAGE_SIZE, DMA_FROM_DEVICE);
+               if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) {
+                       xve_warn(priv, "%s Failed to Map page\n", __func__);
+                       goto partial_error;
+               }
+       }
+
+       rx_ring[id].skb = skb;
+       return skb;
+
+partial_error:
+
+       ib_dma_unmap_single(priv->ca, mapping[0], XVE_CM_HEAD_SIZE,
+                           DMA_FROM_DEVICE);
+
+       for (; i > 0; --i) {
+               xve_counters[XVE_NUM_PAGES_ALLOCED]--;
+               ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE,
+                                   DMA_FROM_DEVICE);
+       }
+
+       dev_kfree_skb_any(skb);
+       return NULL;
+}
+
+static void xve_cm_free_rx_ring(struct net_device *dev,
+                               struct xve_cm_buf *rx_ring)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int i;
+
+       for (i = 0; i < xve_recvq_size; ++i) {
+               if (rx_ring[i].skb) {
+                       xve_cm_dma_unmap_rx(priv, XVE_CM_RX_SG - 1,
+                                           rx_ring[i].mapping);
+                       xve_dev_kfree_skb_any(priv, rx_ring[i].skb, 0);
+               }
+       }
+       vfree(rx_ring);
+}
+
+static void xve_cm_start_rx_drain(struct xve_dev_priv *priv)
+{
+       struct ib_send_wr *bad_wr;
+       struct xve_cm_ctx *p;
+
+       /* We only reserved 1 extra slot in CQ for drain WRs, so
+        * make sure we have at most 1 outstanding WR. */
+       if (list_empty(&priv->cm.rx_flush_list) ||
+           !list_empty(&priv->cm.rx_drain_list))
+               return;
+
+       /*
+        * QPs on flush list are error state.  This way, a "flush
+        * error" WC will be immediately generated for each WR we post.
+        */
+       p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
+       if (ib_post_send(p->qp, &xve_cm_rx_drain_wr, &bad_wr))
+               xve_warn(priv, "failed to post drain wr\n");
+
+       list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
+}
+
+static void xve_cm_rx_event_handler(struct ib_event *event, void *ctx)
+{
+       struct xve_cm_ctx *p = ctx;
+       struct xve_dev_priv *priv = netdev_priv(p->netdev);
+       unsigned long flags;
+
+       if (event->event != IB_EVENT_QP_LAST_WQE_REACHED)
+               return;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       list_move(&p->list, &priv->cm.rx_flush_list);
+       p->state = XVE_CM_RX_FLUSH;
+       xve_cm_start_rx_drain(priv);
+       spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static struct ib_qp *xve_cm_create_rx_qp(struct net_device *dev,
+                                        struct xve_cm_ctx *p)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_qp_init_attr attr = {
+               .event_handler = xve_cm_rx_event_handler,
+               .send_cq = priv->recv_cq,       /* For drain WR */
+               .recv_cq = priv->recv_cq,
+               .srq = priv->cm.srq,
+               .cap.max_send_wr = 1,   /* For drain WR */
+               .cap.max_send_sge = 1,  /* 0 Seems not to work */
+               .sq_sig_type = IB_SIGNAL_ALL_WR,
+               .qp_type = IB_QPT_RC,
+               .qp_context = p,
+       };
+
+       return ib_create_qp(priv->pd, &attr);
+}
+
+static int xve_cm_modify_rx_qp(struct net_device *dev,
+                              struct ib_cm_id *cm_id, struct ib_qp *qp,
+                              unsigned psn)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_qp_attr qp_attr;
+       int qp_attr_mask, ret;
+
+       qp_attr.qp_state = IB_QPS_INIT;
+       ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to init QP attr for INIT: %d\n", ret);
+               return ret;
+       }
+       ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to modify QP to INIT: %d\n", ret);
+               return ret;
+       }
+       qp_attr.qp_state = IB_QPS_RTR;
+       ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
+               return ret;
+       }
+       qp_attr.rq_psn = psn;
+       ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to modify QP to RTR: %d\n", ret);
+               return ret;
+       }
+
+       /*
+        * Current Mellanox HCA firmware won't generate completions
+        * with error for drain WRs unless the QP has been moved to
+        * RTS first. This work-around leaves a window where a QP has
+        * moved to error asynchronously, but this will eventually get
+        * fixed in firmware, so let's not error out if modify QP
+        * fails.
+        */
+       qp_attr.qp_state = IB_QPS_RTS;
+       ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
+               return 0;
+       }
+       ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to modify QP to RTS: %d\n", ret);
+               return 0;
+       }
+
+       return 0;
+}
+
+static void xve_cm_init_rx_wr(struct net_device *dev,
+                             struct ib_recv_wr *wr, struct ib_sge *sge)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int i;
+
+       for (i = 0; i < priv->cm.num_frags; ++i)
+               sge[i].lkey = priv->mr->lkey;
+
+       sge[0].length = XVE_CM_HEAD_SIZE;
+       for (i = 1; i < priv->cm.num_frags; ++i)
+               sge[i].length = PAGE_SIZE;
+
+       wr->next = NULL;
+       wr->sg_list = sge;
+       wr->num_sge = priv->cm.num_frags;
+}
+
+static int xve_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
+                          struct ib_qp *qp, struct ib_cm_req_event_param *req,
+                          unsigned psn)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_cm_data data = { };
+       struct ib_cm_rep_param rep = { };
+
+       data.qpn = cpu_to_be32(priv->qp->qp_num);
+       data.mtu = cpu_to_be32(XVE_CM_BUF_SIZE);
+
+       rep.private_data = &data;
+       rep.private_data_len = sizeof(data);
+       rep.flow_control = 0;
+       rep.rnr_retry_count = req->rnr_retry_count;
+       rep.srq = xve_cm_has_srq(dev);
+       rep.qp_num = qp->qp_num;
+       rep.starting_psn = psn;
+       return ib_send_cm_rep(cm_id, &rep);
+}
+
+static int xve_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+       struct net_device *dev = cm_id->context;
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_cm_ctx *p;
+       unsigned psn;
+       int ret;
+       union ib_gid *dgid = &event->param.req_rcvd.primary_path->dgid;
+       struct xve_path *path;
+
+       xve_debug(DEBUG_CM_INFO, priv, "%s REQ arrived\n", __func__);
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p)
+               return -ENOMEM;
+       p->netdev = dev;
+       strncpy(p->version, XSIGO_LOCAL_VERSION, 60);
+       p->direction = XVE_CM_ESTD_RX;
+       p->id = cm_id;
+       cm_id->context = p;
+       p->state = XVE_CM_RX_LIVE;
+       p->jiffies = jiffies;
+       INIT_LIST_HEAD(&p->list);
+       /*
+        * Save the remote GID
+        */
+       memcpy(&p->dgid, dgid, sizeof(union ib_gid));
+
+       p->qp = xve_cm_create_rx_qp(dev, p);
+       if (IS_ERR(p->qp)) {
+               ret = PTR_ERR(p->qp);
+               goto err_qp;
+       }
+
+       psn = xve_random32(priv);
+       ret = xve_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
+       if (ret)
+               goto err_modify;
+
+       spin_lock_irq(&priv->lock);
+       /* Find path and insert rx_qp */
+       path = __path_find(dev, dgid->raw);
+       if (path) {
+               char print[512];
+
+               print_mgid_buf(print, (char *)dgid->raw);
+               pr_info("XVE: %s  Adding Rx QP to the path %s\n",
+                       priv->xve_name, print);
+               path->cm_ctx_rx = p;
+       } else {
+               priv->counters[XVE_PATH_NOT_SETUP]++;
+       }
+
+       xve_queue_complete_work(priv, XVE_WQ_START_CMSTALE, XVE_CM_RX_DELAY);
+       /* Add this entry to passive ids list head, but do not re-add it
+        * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
+       p->jiffies = jiffies;
+       if (p->state == XVE_CM_RX_LIVE)
+               list_move(&p->list, &priv->cm.passive_ids);
+       spin_unlock_irq(&priv->lock);
+
+       ret = xve_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn);
+       if (ret) {
+               xve_warn(priv, "failed to send REP: %d\n", ret);
+               if (ib_modify_qp(p->qp, &xve_cm_err_attr, IB_QP_STATE))
+                       xve_warn(priv, "unable to move qp to error state\n");
+       }
+       return 0;
+
+err_modify:
+       ib_destroy_qp(p->qp);
+err_qp:
+       kfree(p);
+       return ret;
+}
+
+static int xve_cm_rx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+       struct xve_cm_ctx *p;
+       struct xve_dev_priv *priv;
+
+       switch (event->event) {
+       case IB_CM_REQ_RECEIVED:
+               return xve_cm_req_handler(cm_id, event);
+       case IB_CM_DREQ_RECEIVED:
+               p = cm_id->context;
+               ib_send_cm_drep(cm_id, NULL, 0);
+               /* Fall through */
+       case IB_CM_REJ_RECEIVED:
+               p = cm_id->context;
+               priv = netdev_priv(p->netdev);
+               if (ib_modify_qp(p->qp, &xve_cm_err_attr, IB_QP_STATE))
+                       xve_warn(priv, "unable to move qp to error state\n");
+               /* Fall through */
+       default:
+               return 0;
+       }
+}
+
+static void xve_cm_free_rx_reap_list(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_cm_ctx *rx, *n;
+       LIST_HEAD(list);
+
+       spin_lock_irq(&priv->lock);
+       list_splice_init(&priv->cm.rx_reap_list, &list);
+       spin_unlock_irq(&priv->lock);
+
+       list_for_each_entry_safe(rx, n, &list, list) {
+               ib_destroy_cm_id(rx->id);
+               ib_destroy_qp(rx->qp);
+               kfree(rx);
+       }
+}
+
+void xve_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_cm_buf *rx_ring;
+       unsigned int wr_id = wc->wr_id & ~(XVE_OP_CM | XVE_OP_RECV);
+       struct sk_buff *skb, *newskb = NULL;
+       struct xve_cm_ctx *p;
+       unsigned long flags;
+       u64 mapping[XVE_CM_RX_SG];
+       int frags;
+       struct sk_buff *small_skb;
+       u16 vlan;
+
+       xve_dbg_data(priv, "cm recv completion: id %d, status: %d\n",
+                    wr_id, wc->status);
+
+       if (unlikely(wr_id >= xve_recvq_size)) {
+               if (wr_id ==
+                   (XVE_CM_RX_DRAIN_WRID & ~(XVE_OP_CM | XVE_OP_RECV))) {
+                       spin_lock_irqsave(&priv->lock, flags);
+                       list_splice_init(&priv->cm.rx_drain_list,
+                                        &priv->cm.rx_reap_list);
+                       xve_cm_start_rx_drain(priv);
+                       xve_queue_work(priv, XVE_WQ_START_CMRXREAP);
+                       spin_unlock_irqrestore(&priv->lock, flags);
+               } else
+                       xve_warn(priv,
+                                "cm recv completion event with wrid %d (> %d)\n",
+                                wr_id, xve_recvq_size);
+               return;
+       }
+
+       p = wc->qp->qp_context;
+       if (p == NULL) {
+               pr_err("%s ERROR In CM Connection[RX] context Null  [xve %s]",
+                      __func__, priv->xve_name);
+               return;
+
+       }
+
+       if (p->direction != XVE_CM_ESTD_RX) {
+               pr_err("%s ERROR CM Connection[RX] is not yet", __func__);
+               pr_err(" established [xve %s]", priv->xve_name);
+               pr_err("p->direction %d\n", p->direction);
+               return;
+
+       }
+
+       rx_ring = priv->cm.srq_ring;
+       skb = rx_ring[wr_id].skb;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               if (!test_bit(XVE_DELETING, &priv->state)) {
+                       pr_err("%s: cm recv error", priv->xve_name);
+                       pr_err("(status=%d, wrid=%d", wc->status, wr_id);
+                       pr_err("vend_err %x)\n", wc->vendor_err);
+               }
+               INC_RX_DROP_STATS(priv, dev);
+               goto repost;
+       }
+
+       if (unlikely(!(wr_id & XVE_CM_RX_UPDATE_MASK))) {
+               if (p && time_after_eq(jiffies,
+                                      p->jiffies + XVE_CM_RX_UPDATE_TIME)) {
+                       spin_lock_irqsave(&priv->lock, flags);
+                       p->jiffies = jiffies;
+                       /* Move this entry to list head, but do not re-add it
+                        * if it has been moved out of list. */
+                       if (p->state == XVE_CM_RX_LIVE)
+                               list_move(&p->list, &priv->cm.passive_ids);
+                       spin_unlock_irqrestore(&priv->lock, flags);
+               }
+       }
+
+       if (wc->byte_len < XVE_CM_COPYBREAK) {
+               int dlen = wc->byte_len;
+
+               small_skb = dev_alloc_skb(dlen + NET_IP_ALIGN);
+               if (small_skb) {
+                       skb_reserve(small_skb, NET_IP_ALIGN);
+                       ib_dma_sync_single_for_cpu(priv->ca,
+                                                  rx_ring[wr_id].mapping[0],
+                                                  dlen, DMA_FROM_DEVICE);
+                       skb_copy_from_linear_data(skb, small_skb->data, dlen);
+                       ib_dma_sync_single_for_device(priv->ca,
+                                                     rx_ring[wr_id].mapping[0],
+                                                     dlen, DMA_FROM_DEVICE);
+                       skb_put(small_skb, dlen);
+                       skb = small_skb;
+                       priv->counters[XVE_RX_SMALLSKB_ALLOC_COUNTER]++;
+                       goto copied;
+               }
+       }
+
+       frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
+                                             (unsigned)XVE_CM_HEAD_SIZE)) /
+           PAGE_SIZE;
+
+       newskb = xve_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping);
+       if (unlikely(!newskb)) {
+               /*
+                * If we can't allocate a new RX buffer, dump
+                * this packet and reuse the old buffer.
+                */
+               xve_dbg_data(priv,
+                            "%s failed to allocate rc receive buffer %d\n",
+                            __func__, wr_id);
+               INC_RX_DROP_STATS(priv, dev);
+               goto repost;
+       }
+
+       xve_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
+       memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping));
+
+       xve_dbg_data(priv, "%s received %d bytes, SLID 0x%04x\n", __func__,
+                    wc->byte_len, wc->slid);
+
+       skb_put_frags(skb, XVE_CM_HEAD_SIZE, wc->byte_len, newskb);
+copied:
+
+       vlan = xg_vlan_get_rxtag(skb);
+       xve_fwt_insert(priv, p, &p->dgid, 0, skb->data + ETH_ALEN, vlan);
+       xve_prepare_skb(priv, skb);
+
+       xve_dbg_data(priv,
+                       "%s Received RC packet %02x %02x %02x %02x %02x %02x",
+                       __func__, skb->data[0], skb->data[1], skb->data[2],
+                       skb->data[3], skb->data[4], skb->data[5]);
+       xve_dbg_data(priv,
+                       "%02x %02x %02x %02x %02x %02x proto %x\n",
+                       skb->data[6], skb->data[7], skb->data[8], skb->data[9],
+                       skb->data[10], skb->data[11],
+                       skb->protocol);
+       update_cm_rx_rate(p, skb->len);
+       xve_send_skb(priv, skb);
+repost:
+       if (unlikely(xve_cm_post_receive_srq(dev, wr_id))) {
+               xve_warn(priv, "xve_cm_post_receive_srq failed ");
+               xve_warn(priv, "for buf %d\n", wr_id);
+       }
+}
+
+static inline int post_send(struct xve_dev_priv *priv,
+                           struct xve_cm_ctx *tx,
+                           unsigned int wr_id, u64 addr, int len)
+{
+       struct ib_send_wr *bad_wr;
+
+       priv->tx_sge[0].addr = addr;
+       priv->tx_sge[0].length = len;
+
+       priv->tx_wr.num_sge = 1;
+       priv->tx_wr.wr_id = wr_id | XVE_OP_CM;
+
+       return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
+}
+
+static void xve_cm_tx_buf_free(struct xve_dev_priv *priv,
+                              struct xve_cm_buf *tx_req)
+{
+       if ((tx_req->skb == NULL) || (tx_req->mapping[0] == 0))
+               xve_debug(DEBUG_DATA_INFO, priv,
+                         "%s Contents of tx_req %p are NULL skb %p mapping %lld\n",
+                         __func__, tx_req, tx_req->skb, tx_req->mapping[0]);
+       else
+               ib_dma_unmap_single(priv->ca, tx_req->mapping[0],
+                                   tx_req->skb->len, DMA_TO_DEVICE);
+
+       xve_dev_kfree_skb_any(priv, tx_req->skb, 1);
+       memset(tx_req, 0, sizeof(struct xve_cm_buf));
+}
+
+void xve_cm_send(struct net_device *dev, struct sk_buff *skb,
+                struct xve_cm_ctx *tx)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_cm_buf *tx_req;
+       u64 addr;
+
+       if (unlikely(skb->len > tx->mtu + VLAN_ETH_HLEN)) {
+               xve_warn(priv,
+                        "packet len %d (> %d) too long to send, dropping\n",
+                        skb->len, tx->mtu);
+               INC_TX_DROP_STATS(priv, dev);
+               INC_TX_ERROR_STATS(priv, dev);
+               dev_kfree_skb_any(skb);
+               return;
+       }
+
+       xve_dbg_data(priv,
+                    "sending packet: head 0x%x length %d connection 0x%x\n",
+                    tx->tx_head, skb->len, tx->qp->qp_num);
+
+       /*
+        * We put the skb into the tx_ring _before_ we call post_send()
+        * because it's entirely possible that the completion handler will
+        * run before we execute anything after the post_send().  That
+        * means we have to make sure everything is properly recorded and
+        * our state is consistent before we call post_send().
+        */
+       tx_req = &tx->tx_ring[tx->tx_head & (xve_sendq_size - 1)];
+       tx_req->skb = skb;
+       addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
+       if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+               INC_TX_ERROR_STATS(priv, dev);
+               dev_kfree_skb_any(skb);
+               memset(tx_req, 0, sizeof(struct xve_cm_buf));
+               return;
+       }
+       tx_req->mapping[0] = addr;
+
+       if (unlikely(post_send(priv, tx, tx->tx_head & (xve_sendq_size - 1),
+                              addr, skb->len))) {
+               xve_warn(priv, "post_send failed\n");
+               INC_TX_ERROR_STATS(priv, dev);
+               xve_cm_tx_buf_free(priv, tx_req);
+       } else {
+               ++tx->tx_head;
+               if (++priv->tx_outstanding == xve_sendq_size) {
+                       xve_dbg_data(priv,
+                                    "TX ring 0x%x full, stopping kernel net queue\n",
+                                    tx->qp->qp_num);
+                       if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
+                               xve_warn(priv,
+                                        "request notify on send CQ failed\n");
+                       priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+                       priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
+                       netif_stop_queue(dev);
+               }
+       }
+       priv->send_hbeat_flag = 0;
+
+}
+
+void xve_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_cm_ctx *tx = wc->qp->qp_context;
+       unsigned int wr_id = wc->wr_id & ~XVE_OP_CM;
+       struct xve_cm_buf *tx_req;
+
+       xve_dbg_data(priv, "cm send completion: id %d, status: %d\n",
+                    wr_id, wc->status);
+
+       if (unlikely(wr_id >= xve_sendq_size)) {
+               xve_warn(priv, "cm send completion event with wrid %d (> %d)\n",
+                        wr_id, xve_sendq_size);
+               return;
+       }
+
+       tx_req = &tx->tx_ring[wr_id];
+       xve_cm_tx_buf_free(priv, tx_req);
+       ++tx->tx_tail;
+
+       netif_tx_lock(dev);
+       if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
+           netif_queue_stopped(dev) &&
+           test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
+               priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
+               netif_wake_queue(dev);
+       }
+
+       if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) {
+               pr_err("%s: failed cm send event ", priv->xve_name);
+               pr_err("(status=%d, wrid=%d vend_err %x)\n",
+                      wc->status, wr_id, wc->vendor_err);
+               xve_cm_destroy_tx_deferred(tx);
+       }
+       netif_tx_unlock(dev);
+}
+
+int xve_cm_dev_open(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int ret;
+       u64 sid;
+
+       if (!priv->cm_supported)
+               return 0;
+
+       priv->cm.id = ib_create_cm_id(priv->ca, xve_cm_rx_handler, dev);
+       if (IS_ERR(priv->cm.id)) {
+               pr_warn("%s: failed to create CM ID\n", priv->ca->name);
+               ret = PTR_ERR(priv->cm.id);
+               goto err_cm;
+       }
+
+       sid = priv->local_gid.raw[14] << 8 | priv->local_gid.raw[15];
+       sid = XVE_CM_IETF_ID | sid << 32 | priv->net_id;
+
+       ret = ib_cm_listen(priv->cm.id, cpu_to_be64(sid), 0, NULL);
+       if (ret) {
+               pr_warn("%s: failed to listen on ID 0x%llx\n",
+                       priv->ca->name, sid);
+               goto err_listen;
+       }
+
+       return 0;
+
+err_listen:
+       ib_destroy_cm_id(priv->cm.id);
+err_cm:
+       priv->cm.id = NULL;
+       return ret;
+}
+
+void xve_cm_dev_stop(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_cm_ctx *p;
+       unsigned long begin;
+       int ret;
+
+       if (!priv->cm_supported || !priv->cm.id)
+               return;
+
+       ib_destroy_cm_id(priv->cm.id);
+       priv->cm.id = NULL;
+
+       spin_lock_irq(&priv->lock);
+       while (!list_empty(&priv->cm.passive_ids)) {
+               p = list_entry(priv->cm.passive_ids.next, typeof(*p), list);
+               list_move(&p->list, &priv->cm.rx_error_list);
+               p->state = XVE_CM_RX_ERROR;
+               spin_unlock_irq(&priv->lock);
+               ret = ib_modify_qp(p->qp, &xve_cm_err_attr, IB_QP_STATE);
+               if (ret)
+                       xve_warn(priv, "unable to move qp to error state: %d\n",
+                                ret);
+               spin_lock_irq(&priv->lock);
+       }
+
+       /* Wait for all RX to be drained */
+       begin = jiffies;
+
+       while (!list_empty(&priv->cm.rx_error_list) ||
+              !list_empty(&priv->cm.rx_flush_list) ||
+              !list_empty(&priv->cm.rx_drain_list)) {
+               if (time_after(jiffies, begin + 5 * HZ)) {
+                       xve_warn(priv, "RX drain timing out\n");
+
+                       /*
+                        * assume the HW is wedged and just free up everything.
+                        */
+                       list_splice_init(&priv->cm.rx_flush_list,
+                                        &priv->cm.rx_reap_list);
+                       list_splice_init(&priv->cm.rx_error_list,
+                                        &priv->cm.rx_reap_list);
+                       list_splice_init(&priv->cm.rx_drain_list,
+                                        &priv->cm.rx_reap_list);
+                       break;
+               }
+               spin_unlock_irq(&priv->lock);
+               msleep(20);
+               xve_drain_cq(dev);
+               spin_lock_irq(&priv->lock);
+       }
+
+       spin_unlock_irq(&priv->lock);
+
+       cancel_delayed_work_sync(&priv->stale_task);
+       xve_cm_free_rx_reap_list(dev);
+       __xve_cm_tx_reap(priv);
+
+}
+
+static int xve_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+       struct xve_cm_ctx *p = cm_id->context;
+       struct xve_dev_priv *priv = netdev_priv(p->netdev);
+       struct xve_cm_data *data = event->private_data;
+       struct sk_buff_head skqueue;
+       struct ib_qp_attr qp_attr;
+       int qp_attr_mask, ret;
+       struct sk_buff *skb;
+
+       p->mtu = be32_to_cpu(data->mtu);
+
+       if (p->mtu <= ETH_HLEN) {
+               xve_warn(priv, "Rejecting connection: mtu %d <= %d\n",
+                        p->mtu, ETH_HLEN);
+               return -EINVAL;
+       }
+
+       qp_attr.qp_state = IB_QPS_RTR;
+       ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to init QP attr for RTR: %d\n", ret);
+               return ret;
+       }
+
+       qp_attr.rq_psn = 0; /* FIXME */
+       ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to modify QP to RTR: %d\n", ret);
+               return ret;
+       }
+
+       qp_attr.qp_state = IB_QPS_RTS;
+       ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to init QP attr for RTS: %d\n", ret);
+               return ret;
+       }
+       ret = ib_modify_qp(p->qp, &qp_attr, qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to modify QP to RTS: %d\n", ret);
+               return ret;
+       }
+
+       skb_queue_head_init(&skqueue);
+
+       spin_lock_irq(&priv->lock);
+       set_bit(XVE_FLAG_OPER_UP, &p->flags);
+       while ((skb = __skb_dequeue(&p->path->queue)))
+               __skb_queue_tail(&skqueue, skb);
+       spin_unlock_irq(&priv->lock);
+
+       while ((skb = __skb_dequeue(&skqueue))) {
+               skb->dev = p->netdev;
+               if (dev_queue_xmit(skb)) {
+                       xve_warn(priv, "dev_queue_xmit failed ");
+                       xve_warn(priv, "to requeue packet\n");
+               } else {
+                       xve_dbg_data(priv, "%s Succefully sent skb\n",
+                                    __func__);
+               }
+
+       }
+
+       ret = ib_send_cm_rtu(cm_id, NULL, 0);
+       if (ret) {
+               xve_warn(priv, "failed to send RTU: %d\n", ret);
+               return ret;
+       }
+       return 0;
+}
+
+static struct ib_qp *xve_cm_create_tx_qp(struct net_device *dev,
+                                        struct xve_cm_ctx *tx)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_qp_init_attr attr = {
+               .send_cq = priv->recv_cq,
+               .recv_cq = priv->recv_cq,
+               .srq = priv->cm.srq,
+               .cap.max_send_wr = xve_sendq_size,
+               .cap.max_send_sge = 1,
+               .sq_sig_type = IB_SIGNAL_ALL_WR,
+               .qp_type = IB_QPT_RC,
+               .qp_context = tx
+       };
+
+       return ib_create_qp(priv->pd, &attr);
+}
+
+static int xve_cm_send_req(struct net_device *dev,
+                          struct ib_cm_id *id, struct ib_qp *qp,
+                          struct ib_sa_path_rec *pathrec)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_cm_data data = { };
+       struct ib_cm_req_param req = { };
+       u64 sid;
+
+       sid = pathrec->dgid.raw[14] << 8 | pathrec->dgid.raw[15];
+       sid = XVE_CM_IETF_ID | sid << 32 | priv->net_id;
+
+       data.qpn = cpu_to_be32(priv->qp->qp_num);
+       data.mtu = cpu_to_be32(XVE_CM_BUF_SIZE);
+
+       req.primary_path = pathrec;
+       req.alternate_path = NULL;
+       req.service_id = cpu_to_be64(sid);
+       req.qp_num = qp->qp_num;
+       req.qp_type = qp->qp_type;
+       req.private_data = &data;
+       req.private_data_len = sizeof(data);
+       req.flow_control = 0;
+
+       req.starting_psn = 0;   /* FIXME */
+
+       /*
+        * Pick some arbitrary defaults here; we could make these
+        * module parameters if anyone cared about setting them.
+        */
+       req.responder_resources = 4;
+       req.remote_cm_response_timeout = 20;
+       req.local_cm_response_timeout = 20;
+       req.retry_count = retry_count;
+       req.rnr_retry_count = rnr_retry_count;
+       req.max_cm_retries = 15;
+       req.srq = xve_cm_has_srq(dev);
+       return ib_send_cm_req(id, &req);
+}
+
+static int xve_cm_modify_tx_init(struct net_device *dev,
+                                struct ib_cm_id *cm_id, struct ib_qp *qp)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_qp_attr qp_attr;
+       int qp_attr_mask, ret;
+
+       ret =
+           ib_find_pkey(priv->ca, priv->port, priv->pkey, &qp_attr.pkey_index);
+       if (ret) {
+               xve_warn(priv, "pkey 0x%x not found: %d\n", priv->pkey, ret);
+               return ret;
+       }
+
+       qp_attr.qp_state = IB_QPS_INIT;
+       qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+       qp_attr.port_num = priv->port;
+       qp_attr_mask =
+           IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT;
+
+       ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to modify tx QP to INIT: %d\n", ret);
+               return ret;
+       }
+       return 0;
+}
+
+static int xve_cm_tx_init(struct xve_cm_ctx *p, struct ib_sa_path_rec *pathrec)
+{
+       struct xve_dev_priv *priv = netdev_priv(p->netdev);
+       int ret;
+
+       p->tx_ring = vmalloc(xve_sendq_size * sizeof(*p->tx_ring));
+       if (!p->tx_ring) {
+               xve_warn(priv, "failed to allocate tx ring\n");
+               ret = -ENOMEM;
+               goto err_tx;
+       }
+       memset(p->tx_ring, 0, xve_sendq_size * sizeof(*p->tx_ring));
+
+       p->qp = xve_cm_create_tx_qp(p->netdev, p);
+       if (IS_ERR(p->qp)) {
+               ret = PTR_ERR(p->qp);
+               xve_warn(priv, "failed to allocate tx qp: %d\n", ret);
+               goto err_qp;
+       }
+
+       p->id = ib_create_cm_id(priv->ca, xve_cm_tx_handler, p);
+       if (IS_ERR(p->id)) {
+               ret = PTR_ERR(p->id);
+               xve_warn(priv, "failed to create tx cm id: %d\n", ret);
+               goto err_id;
+       }
+
+       ret = xve_cm_modify_tx_init(p->netdev, p->id, p->qp);
+       if (ret) {
+               xve_warn(priv, "failed to modify tx qp to rtr: %d\n", ret);
+               goto err_modify;
+       }
+
+       ret = xve_cm_send_req(p->netdev, p->id, p->qp, pathrec);
+       if (ret) {
+               xve_warn(priv, "failed to send cm req: %d\n", ret);
+               goto err_send_cm;
+       }
+
+       xve_debug(DEBUG_CM_INFO, priv, "%s Request connection", __func__);
+       xve_debug(DEBUG_CM_INFO, priv, "0x%x for gid", p->qp->qp_num);
+       xve_debug(DEBUG_CM_INFO, priv, "%pI6 net_id 0x%x\n", pathrec->dgid.raw,
+                 priv->net_id);
+
+       return 0;
+
+err_send_cm:
+err_modify:
+       ib_destroy_cm_id(p->id);
+err_id:
+       p->id = NULL;
+       ib_destroy_qp(p->qp);
+err_qp:
+       p->qp = NULL;
+       vfree(p->tx_ring);
+err_tx:
+       return ret;
+}
+
+static void xve_cm_tx_destroy(struct xve_cm_ctx *p)
+{
+       struct xve_dev_priv *priv = netdev_priv(p->netdev);
+       struct xve_cm_buf *tx_req;
+       unsigned long begin;
+       unsigned long flags = 0;
+
+       xve_debug(DEBUG_CM_INFO, priv, "%s Destroy active conn", __func__);
+       xve_debug(DEBUG_CM_INFO, priv, "0x%x head", p->qp ? p->qp->qp_num : 0);
+       xve_debug(DEBUG_CM_INFO, priv, " 0x%x tail 0x%x\n", p->tx_head,
+                 p->tx_tail);
+       if (p->id)
+               ib_destroy_cm_id(p->id);
+
+       if (p->tx_ring) {
+               /* Wait for all sends to complete */
+               if (!netif_carrier_ok(priv->netdev)
+                   && unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+                       while (poll_tx(priv)); /* nothing */
+
+               begin = jiffies;
+               while ((int)p->tx_tail - (int)p->tx_head < 0) {
+                       if (time_after(jiffies, begin + 5 * HZ)) {
+                               xve_warn(priv,
+                                        "timing out; %d sends not completed\n",
+                                        p->tx_head - p->tx_tail);
+                               goto timeout;
+                       }
+
+                       msleep(20);
+               }
+       }
+
+timeout:
+
+       spin_lock_irqsave(&priv->lock, flags);
+       while ((int)p->tx_tail - (int)p->tx_head < 0) {
+               tx_req = &p->tx_ring[p->tx_tail & (xve_sendq_size - 1)];
+               ++p->tx_tail;
+               spin_unlock_irqrestore(&priv->lock, flags);
+
+               xve_cm_tx_buf_free(priv, tx_req);
+               netif_tx_lock_bh(p->netdev);
+               if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
+                   netif_queue_stopped(p->netdev) &&
+                   test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
+                       priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
+                       netif_wake_queue(p->netdev);
+               }
+               netif_tx_unlock_bh(p->netdev);
+
+               spin_lock_irqsave(&priv->lock, flags);
+       }
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       pr_info("%s [xve %s] Destroyed active con", __func__, priv->xve_name);
+       pr_info("qp [0x%x] head", p->qp ? p->qp->qp_num : 0);
+       pr_info("0x%x tail 0x%x\n", p->tx_head, p->tx_tail);
+       if (p->qp)
+               ib_destroy_qp(p->qp);
+       if (p->tx_ring)
+               vfree(p->tx_ring);
+       if (p != NULL)
+               kfree(p);
+}
+
+static int xve_cm_tx_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+{
+       struct xve_cm_ctx *tx = cm_id->context;
+       struct xve_dev_priv *priv;
+       struct net_device *dev;
+       int ret;
+
+       if (tx == NULL) {
+               pr_info("XVE: %s qpn %d Event %d\n", __func__,
+                       cm_id->remote_cm_qpn, event->event);
+               return 0;
+       }
+
+       priv = netdev_priv(tx->netdev);
+       dev = priv->netdev;
+       switch (event->event) {
+       case IB_CM_DREQ_RECEIVED:
+               xve_debug(DEBUG_CM_INFO, priv, "%s DREQ received QP %x\n",
+                         __func__, tx->qp ? tx->qp->qp_num : 0);
+
+               ib_send_cm_drep(cm_id, NULL, 0);
+               break;
+       case IB_CM_REP_RECEIVED:
+               xve_debug(DEBUG_CM_INFO, priv, "%s REP received QP %x\n",
+                         __func__, tx->qp ? tx->qp->qp_num : 0);
+               ret = xve_cm_rep_handler(cm_id, event);
+               if (ret)
+                       ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+                                      NULL, 0, NULL, 0);
+               break;
+       case IB_CM_REQ_ERROR:
+       case IB_CM_REJ_RECEIVED:
+       case IB_CM_TIMEWAIT_EXIT:
+               pr_info("%s CM event %d [dev %s] QP %x\n", __func__,
+                       event->event, dev->name, tx->qp ? tx->qp->qp_num : 0);
+               netif_tx_lock_bh(dev);
+               /*
+                * Should we delete all L2 entries XXX
+                */
+               xve_cm_destroy_tx_deferred(tx);
+               netif_tx_unlock_bh(dev);
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
+struct xve_cm_ctx *xve_cm_create_tx(struct net_device *dev,
+                                   struct xve_path *path)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_cm_ctx *tx;
+
+       tx = kzalloc(sizeof(*tx), GFP_ATOMIC);
+       if (!tx)
+               return NULL;
+
+       xve_cm_set(path, tx);
+       strncpy(tx->version, XSIGO_LOCAL_VERSION, 60);
+       tx->direction = XVE_CM_ESTD_TX;
+       tx->path = path;
+       tx->netdev = dev;
+       list_add(&tx->list, &priv->cm.start_list);
+       set_bit(XVE_FLAG_INITIALIZED, &tx->flags);
+       xve_queue_work(priv, XVE_WQ_START_CMTXSTART);
+       return tx;
+}
+
+void xve_cm_destroy_tx_deferred(struct xve_cm_ctx *tx)
+{
+       struct xve_dev_priv *priv = netdev_priv(tx->netdev);
+       unsigned long flags = 0;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       clear_bit(XVE_FLAG_OPER_UP, &tx->flags);
+       if (test_and_clear_bit(XVE_FLAG_INITIALIZED, &tx->flags)) {
+               list_move(&tx->list, &priv->cm.reap_list);
+               xve_cm_set(tx->path, NULL);
+               xve_queue_work(priv, XVE_WQ_START_CMTXREAP);
+       }
+       spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+void xve_cm_tx_start(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_CMTXSTART, 0);
+       struct net_device *dev = priv->netdev;
+       struct xve_cm_ctx *p;
+       unsigned long flags;
+       int ret;
+       struct ib_sa_path_rec pathrec;
+
+       netif_tx_lock_bh(dev);
+       spin_lock_irqsave(&priv->lock, flags);
+
+       while (!list_empty(&priv->cm.start_list)) {
+               p = list_entry(priv->cm.start_list.next, typeof(*p), list);
+               list_del_init(&p->list);
+               memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec));
+
+               spin_unlock_irqrestore(&priv->lock, flags);
+               netif_tx_unlock_bh(dev);
+
+               ret = xve_cm_tx_init(p, &pathrec);
+
+               netif_tx_lock_bh(dev);
+               spin_lock_irqsave(&priv->lock, flags);
+       }
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+       netif_tx_unlock_bh(dev);
+       xve_put_ctx(priv);
+
+}
+
+static void __xve_cm_tx_reap(struct xve_dev_priv *priv)
+{
+       struct net_device *dev = priv->netdev;
+       struct xve_cm_ctx *p;
+       unsigned long flags;
+
+       netif_tx_lock_bh(dev);
+       spin_lock_irqsave(&priv->lock, flags);
+
+       while (!list_empty(&priv->cm.reap_list)) {
+               p = list_entry(priv->cm.reap_list.next, typeof(*p), list);
+               list_del(&p->list);
+               spin_unlock_irqrestore(&priv->lock, flags);
+               netif_tx_unlock_bh(dev);
+               /*
+                * Destroy path
+                */
+               if (p->path)
+                       xve_flush_single_path_by_gid(dev,
+                                                    &p->path->pathrec.dgid);
+               xve_cm_tx_destroy(p);
+               netif_tx_lock_bh(dev);
+               spin_lock_irqsave(&priv->lock, flags);
+       }
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+       netif_tx_unlock_bh(dev);
+}
+
+void xve_cm_tx_reap(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_CMTXREAP, 0);
+       __xve_cm_tx_reap(priv);
+       xve_put_ctx(priv);
+}
+
+void xve_cm_rx_reap(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_CMRXREAP, 0);
+
+       xve_cm_free_rx_reap_list(priv->netdev);
+       xve_put_ctx(priv);
+}
+
+void xve_cm_stale_task(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_CMSTALE, 2);
+       struct xve_cm_ctx *p;
+       int ret;
+
+       spin_lock_irq(&priv->lock);
+       while (!list_empty(&priv->cm.passive_ids)) {
+               /* List is sorted by LRU, start from tail,
+                * stop when we see a recently used entry */
+               p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list);
+               if (time_before_eq(jiffies, p->jiffies + XVE_CM_RX_TIMEOUT))
+                       break;
+               list_move(&p->list, &priv->cm.rx_error_list);
+               p->state = XVE_CM_RX_ERROR;
+               spin_unlock_irq(&priv->lock);
+               ret = ib_modify_qp(p->qp, &xve_cm_err_attr, IB_QP_STATE);
+               if (ret)
+                       xve_warn(priv, "unable to move qp to error state: %d\n",
+                                ret);
+               spin_lock_irq(&priv->lock);
+       }
+
+       if (!list_empty(&priv->cm.passive_ids))
+               xve_queue_complete_work(priv, XVE_WQ_START_CMSTALE,
+                                       XVE_CM_RX_DELAY);
+
+       spin_unlock_irq(&priv->lock);
+}
+
+static void xve_cm_create_srq(struct net_device *dev, int max_sge)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_srq_init_attr srq_init_attr = {
+               .attr = {
+                        .max_wr = xve_recvq_size,
+                        .max_sge = max_sge}
+       };
+
+       priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
+       if (IS_ERR(priv->cm.srq)) {
+               if (PTR_ERR(priv->cm.srq) != -ENOSYS)
+                       pr_warn("%s: failed to allocate SRQ, error %ld\n",
+                               priv->ca->name, PTR_ERR(priv->cm.srq));
+               priv->cm.srq = NULL;
+               return;
+       }
+
+       priv->cm.srq_ring =
+           vmalloc(xve_recvq_size * sizeof(*priv->cm.srq_ring));
+       if (!priv->cm.srq_ring) {
+               pr_warn("%s: failed to allocate CM SRQ ring (%d entries)\n",
+                       priv->ca->name, xve_recvq_size);
+               ib_destroy_srq(priv->cm.srq);
+               priv->cm.srq = NULL;
+               return;
+       }
+
+       memset(priv->cm.srq_ring, 0,
+              xve_recvq_size * sizeof(*priv->cm.srq_ring));
+}
+
+int xve_cm_dev_init(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int i, ret;
+       struct ib_device_attr attr;
+
+       INIT_LIST_HEAD(&priv->cm.passive_ids);
+       INIT_LIST_HEAD(&priv->cm.reap_list);
+       INIT_LIST_HEAD(&priv->cm.start_list);
+       INIT_LIST_HEAD(&priv->cm.rx_error_list);
+       INIT_LIST_HEAD(&priv->cm.rx_flush_list);
+       INIT_LIST_HEAD(&priv->cm.rx_drain_list);
+       INIT_LIST_HEAD(&priv->cm.rx_reap_list);
+
+       ret = ib_query_device(priv->ca, &attr);
+       if (ret) {
+               pr_warn("ib_query_device() failed with %d\n", ret);
+               return ret;
+       }
+
+       /* Based on the admin mtu from the chassis */
+       attr.max_srq_sge =
+           min_t(int,
+                 ALIGN((priv->admin_mtu + VLAN_ETH_HLEN),
+                       PAGE_SIZE) / PAGE_SIZE, attr.max_srq_sge);
+       xve_debug(DEBUG_CM_INFO, priv, "%s max_srq_sge=%d\n", __func__,
+                 attr.max_srq_sge);
+
+       xve_cm_create_srq(dev, attr.max_srq_sge);
+       if (xve_cm_has_srq(dev)) {
+               priv->cm.max_cm_mtu = attr.max_srq_sge * PAGE_SIZE - 0x20;
+               priv->cm.num_frags = attr.max_srq_sge;
+               xve_debug(DEBUG_CM_INFO, priv,
+                         "%s max_cm_mtu = 0x%x, num_frags=%d\n", __func__,
+                         priv->cm.max_cm_mtu, priv->cm.num_frags);
+       } else {
+               pr_notice("XVE: Non-SRQ mode not supported\n");
+               return -ENOTSUPP;
+       }
+
+       xve_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
+
+       if (xve_cm_has_srq(dev)) {
+               for (i = 0; i < xve_recvq_size; ++i) {
+                       if (!xve_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
+                                                priv->cm.num_frags - 1,
+                                                priv->cm.
+                                                srq_ring[i].mapping)) {
+                               xve_warn(priv,
+                                       "%s failed to allocate rc ",
+                                       __func__);
+                               xve_warn(priv,
+                                        "receive buffer %d\n", i);
+                               xve_cm_dev_cleanup(dev);
+                               return -ENOMEM;
+                       }
+
+                       if (xve_cm_post_receive_srq(dev, i)) {
+                               xve_warn(priv, "xve_cm_post_receive_srq ");
+                               xve_warn(priv, "failed for buf %d\n", i);
+                               xve_cm_dev_cleanup(dev);
+                               return -EIO;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+void xve_cm_dev_cleanup(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int ret;
+
+       if (!priv->cm.srq)
+               return;
+
+       xve_debug(DEBUG_CM_INFO, priv, "%s Cleanup xve CM\n", __func__);
+
+       ret = ib_destroy_srq(priv->cm.srq);
+       if (ret)
+               xve_warn(priv, "ib_destroy_srq failed: %d\n", ret);
+
+       priv->cm.srq = NULL;
+       if (!priv->cm.srq_ring)
+               return;
+
+       xve_cm_free_rx_ring(dev, priv->cm.srq_ring);
+       priv->cm.srq_ring = NULL;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_compat.h b/drivers/infiniband/ulp/xsigo/xve/xve_compat.h
new file mode 100644 (file)
index 0000000..cd24547
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef _XVE_COMPAT_H
+#define _XVE_COMPAT_H
+#include "xve.h"
+#define        XVE_OP_CM     (1ul << 30)
+
+#include <net/icmp.h>
+static inline void skb_pkt_type(struct sk_buff *skb, unsigned char type)
+{
+       skb->pkt_type = type;
+}
+
+static inline void xve_dev_set_mtu(struct net_device *dev, int mtu)
+{
+       rtnl_lock();
+       dev_set_mtu(dev, mtu);
+       rtnl_unlock();
+}
+
+static inline void xg_skb_push(struct sk_buff *skb, unsigned int len)
+{
+       skb_push(skb, len);
+}
+
+static inline unsigned xve_random32(struct xve_dev_priv *priv)
+{
+       return (prandom_u32() & 0xffffff);
+}
+
+static inline struct proc_dir_entry *xg_create_proc_entry(const char *name,
+                                                         mode_t mode,
+                                                         struct proc_dir_entry
+                                                         *parent, char root)
+{
+       struct proc_dir_entry *proc_dir = NULL;
+
+       if (mode == S_IFDIR)
+               proc_dir = proc_mkdir(name, parent);
+
+       return proc_dir;
+
+}
+
+static inline void xg_remove_proc_entry(const char *name,
+                                       struct proc_dir_entry *parent)
+{
+       return remove_proc_entry(name, parent);
+}
+
+#endif /* _XVE_COMPAT_H */
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_ethtool.c b/drivers/infiniband/ulp/xsigo/xve/xve_ethtool.c
new file mode 100644 (file)
index 0000000..236d27a
--- /dev/null
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+
+static void xve_get_drvinfo(struct net_device *netdev,
+                           struct ethtool_drvinfo *drvinfo)
+{
+       strncpy(drvinfo->driver, "xve", sizeof(drvinfo->driver) - 1);
+       strncpy(drvinfo->version, XVE_DRIVER_VERSION, 32);
+       strncpy(drvinfo->fw_version, "N/A", 32);
+       strncpy(drvinfo->bus_info, "N/A", 32);
+}
+
+static int xve_get_coalesce(struct net_device *dev,
+                           struct ethtool_coalesce *coal)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       coal->rx_coalesce_usecs = priv->ethtool.coalesce_usecs;
+       coal->tx_coalesce_usecs = priv->ethtool.coalesce_usecs;
+       coal->rx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
+       coal->tx_max_coalesced_frames = priv->ethtool.max_coalesced_frames;
+
+       return 0;
+}
+
+static int xve_set_coalesce(struct net_device *dev,
+                           struct ethtool_coalesce *coal)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int ret;
+
+       /*
+        * Since Xve uses a single CQ for both rx and tx, we assume
+        * that rx params dictate the configuration.  These values are
+        * saved in the private data and returned when xve_get_coalesce()
+        * is called.
+        */
+       if (coal->rx_coalesce_usecs > 0xffff ||
+           coal->rx_max_coalesced_frames > 0xffff)
+               return -EINVAL;
+
+       if (coal->rx_max_coalesced_frames | coal->rx_coalesce_usecs) {
+               if (!coal->rx_max_coalesced_frames)
+                       coal->rx_max_coalesced_frames = 0xffff;
+               else if (!coal->rx_coalesce_usecs)
+                       coal->rx_coalesce_usecs = 0xffff;
+       }
+
+       ret = ib_modify_cq(priv->recv_cq, coal->rx_max_coalesced_frames,
+                          coal->rx_coalesce_usecs);
+
+       if (ret && ret != -ENOSYS) {
+               xve_warn(priv, "failed modifying CQ (%d)\n", ret);
+               return ret;
+       }
+
+       coal->tx_coalesce_usecs = coal->rx_coalesce_usecs;
+       coal->tx_max_coalesced_frames = coal->rx_max_coalesced_frames;
+       priv->ethtool.coalesce_usecs = coal->rx_coalesce_usecs;
+       priv->ethtool.max_coalesced_frames = coal->rx_max_coalesced_frames;
+
+       return 0;
+}
+
+static const char xve_stats_keys[][ETH_GSTRING_LEN] = {
+       "rx_packets", "rx_bytes", "rx_errors", "rx_drops",
+       "tx_packets", "tx_bytes", "tx_errors", "tx_drops",
+       "LRO aggregated", "LRO flushed",
+       "LRO avg aggr", "LRO no desc"
+};
+
+static void xve_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+{
+       switch (stringset) {
+       case ETH_SS_STATS:
+               memcpy(data, *xve_stats_keys, sizeof(xve_stats_keys));
+               break;
+       }
+}
+
+static int xve_get_sset_count(struct net_device *dev, int sset)
+{
+       switch (sset) {
+       case ETH_SS_STATS:
+               return ARRAY_SIZE(xve_stats_keys);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void xve_get_ethtool_stats(struct net_device *dev,
+                                 struct ethtool_stats *stats, uint64_t *data)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int index = 0;
+
+       /* Get LRO statistics */
+       data[index++] = dev->stats.rx_packets;
+       data[index++] = dev->stats.rx_bytes;
+       data[index++] = dev->stats.rx_errors;
+       data[index++] = dev->stats.rx_dropped;
+
+       data[index++] = dev->stats.tx_packets;
+       data[index++] = dev->stats.tx_bytes;
+       data[index++] = dev->stats.tx_errors;
+       data[index++] = dev->stats.tx_dropped;
+
+       data[index++] = priv->lro.lro_mgr.stats.aggregated;
+       data[index++] = priv->lro.lro_mgr.stats.flushed;
+       if (priv->lro.lro_mgr.stats.flushed)
+               data[index++] = priv->lro.lro_mgr.stats.aggregated /
+                   priv->lro.lro_mgr.stats.flushed;
+       else
+               data[index++] = 0;
+       data[index++] = priv->lro.lro_mgr.stats.no_desc;
+}
+
+static int xve_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+{
+       struct xve_dev_priv *xvep = netdev_priv(netdev);
+
+       ecmd->autoneg = 0;
+       ecmd->speed = SPEED_10000;
+       ecmd->duplex = DUPLEX_FULL;     /* Duplex is hard coded */
+       if (netif_carrier_ok(netdev)) {
+               ecmd->speed = xvep->port_speed;
+               ecmd->advertising = ADVERTISED_10000baseT_Full;
+               ecmd->supported = SUPPORTED_10000baseT_Full |
+                   SUPPORTED_FIBRE | SUPPORTED_Autoneg;
+               ecmd->port = PORT_FIBRE;
+               ecmd->transceiver = XCVR_EXTERNAL;
+
+       }
+       return 0;
+}
+
+static const struct ethtool_ops xve_ethtool_ops = {
+       .get_settings = xve_get_settings,
+       .get_drvinfo = xve_get_drvinfo,
+       .get_coalesce = xve_get_coalesce,
+       .set_coalesce = xve_set_coalesce,
+       .get_strings = xve_get_strings,
+       .get_sset_count = xve_get_sset_count,
+       .get_ethtool_stats = xve_get_ethtool_stats,
+       .get_link = ethtool_op_get_link,
+};
+
+void xve_set_ethtool_ops(struct net_device *dev)
+{
+       dev->ethtool_ops = &xve_ethtool_ops;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_fs.c b/drivers/infiniband/ulp/xsigo/xve/xve_fs.c
new file mode 100644 (file)
index 0000000..23d7806
--- /dev/null
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+#if defined(CONFIG_INFINIBAND_XVE_DEBUG)
+const struct file_operations;
+static struct dentry *xve_root;
+
+static void format_gid(union ib_gid *gid, char *buf)
+{
+       int i, n;
+
+       for (n = 0, i = 0; i < 8; ++i) {
+               n += sprintf(buf + n, "%x",
+                            be16_to_cpu(((__be16 *) gid->raw)[i]));
+               if (i < 7)
+                       buf[n++] = ':';
+       }
+}
+
+static void *xve_mcg_seq_start(struct seq_file *file, loff_t *pos)
+{
+       struct xve_mcast_iter *iter;
+       loff_t n = *pos;
+
+       iter = xve_mcast_iter_init(file->private);
+       if (!iter)
+               return NULL;
+
+       while (n--) {
+               if (xve_mcast_iter_next(iter)) {
+                       kfree(iter);
+                       return NULL;
+               }
+       }
+
+       return iter;
+}
+
+static void *xve_mcg_seq_next(struct seq_file *file, void *iter_ptr,
+                             loff_t *pos)
+{
+       struct xve_mcast_iter *iter = iter_ptr;
+
+       (*pos)++;
+
+       if (xve_mcast_iter_next(iter)) {
+               kfree(iter);
+               return NULL;
+       }
+
+       return iter;
+}
+
+static void xve_mcg_seq_stop(struct seq_file *file, void *iter_ptr)
+{
+       /* nothing for now */
+}
+
+static int xve_mcg_seq_show(struct seq_file *file, void *iter_ptr)
+{
+       struct xve_mcast_iter *iter = iter_ptr;
+       char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
+       union ib_gid mgid;
+       unsigned long created;
+       unsigned int queuelen, complete, send_only;
+
+       if (!iter)
+               return 0;
+
+       xve_mcast_iter_read(iter, &mgid, &created, &queuelen,
+                           &complete, &send_only);
+
+       format_gid(&mgid, gid_buf);
+
+       seq_printf(file,
+                  "GID: %s\n"
+                  "  created: %10ld\n"
+                  "  queuelen: %9d\n"
+                  "  complete: %9s\n"
+                  "  send_only: %8s\n"
+                  "\n",
+                  gid_buf, created, queuelen,
+                  complete ? "yes" : "no", send_only ? "yes" : "no");
+
+       return 0;
+}
+
+static const struct seq_operations xve_mcg_seq_ops = {
+       .start = xve_mcg_seq_start,
+       .next = xve_mcg_seq_next,
+       .stop = xve_mcg_seq_stop,
+       .show = xve_mcg_seq_show,
+};
+
+static int xve_mcg_open(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       int ret;
+
+       ret = seq_open(file, &xve_mcg_seq_ops);
+       if (ret)
+               return ret;
+
+       seq = file->private_data;
+       seq->private = inode->i_private;
+
+       return 0;
+}
+
+static const struct file_operations xve_mcg_fops = {
+       .owner = THIS_MODULE,
+       .open = xve_mcg_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = seq_release
+};
+
+static void *xve_path_seq_start(struct seq_file *file, loff_t *pos)
+{
+       struct xve_path_iter *iter;
+       loff_t n = *pos;
+
+       iter = xve_path_iter_init(file->private);
+       if (!iter)
+               return NULL;
+
+       while (n--) {
+               if (xve_path_iter_next(iter)) {
+                       kfree(iter);
+                       return NULL;
+               }
+       }
+
+       return iter;
+}
+
+static void *xve_path_seq_next(struct seq_file *file, void *iter_ptr,
+                              loff_t *pos)
+{
+       struct xve_path_iter *iter = iter_ptr;
+
+       (*pos)++;
+
+       if (xve_path_iter_next(iter)) {
+               kfree(iter);
+               return NULL;
+       }
+
+       return iter;
+}
+
+static void xve_path_seq_stop(struct seq_file *file, void *iter_ptr)
+{
+       /* nothing for now */
+}
+
+static int xve_path_seq_show(struct seq_file *file, void *iter_ptr)
+{
+       struct xve_path_iter *iter = iter_ptr;
+       char gid_buf[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff"];
+       struct xve_path path;
+       int rate;
+
+       if (!iter)
+               return 0;
+
+       xve_path_iter_read(iter, &path);
+
+       format_gid(&path.pathrec.dgid, gid_buf);
+
+       seq_printf(file,
+                  "GID: %s\n"
+                  "  complete: %6s\n",
+                  gid_buf, path.pathrec.dlid ? "yes" : "no");
+
+       if (path.pathrec.dlid) {
+               rate = ib_rate_to_mult(path.pathrec.rate) * 25;
+
+               seq_printf(file,
+                          "  DLID:     0x%04x\n"
+                          "  SL: %12d\n"
+                          "  rate: %*d%s Gb/sec\n",
+                          be16_to_cpu(path.pathrec.dlid),
+                          path.pathrec.sl,
+                          10 - ((rate % 10) ? 2 : 0),
+                          rate / 10, rate % 10 ? ".5" : "");
+       }
+
+       seq_putc(file, '\n');
+
+       return 0;
+}
+
+static const struct seq_operations xve_path_seq_ops = {
+       .start = xve_path_seq_start,
+       .next = xve_path_seq_next,
+       .stop = xve_path_seq_stop,
+       .show = xve_path_seq_show,
+};
+
+static int xve_path_open(struct inode *inode, struct file *file)
+{
+       struct seq_file *seq;
+       int ret;
+
+       ret = seq_open(file, &xve_path_seq_ops);
+       if (ret)
+               return ret;
+
+       seq = file->private_data;
+       seq->private = inode->i_private;
+
+       return 0;
+}
+
+static const struct file_operations xve_path_fops = {
+       .owner = THIS_MODULE,
+       .open = xve_path_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = seq_release
+};
+
+void xve_create_debug_files(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       char name[IFNAMSIZ + sizeof "_path"];
+
+       snprintf(name, sizeof(name), "%s_mcg", dev->name);
+       priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
+                                              xve_root, dev, &xve_mcg_fops);
+       if (!priv->mcg_dentry)
+               xve_warn(priv, "failed to create mcg debug file\n");
+
+       snprintf(name, sizeof(name), "%s_path", dev->name);
+       priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
+                                               xve_root, dev, &xve_path_fops);
+       if (!priv->path_dentry)
+               xve_warn(priv, "failed to create path debug file\n");
+}
+
+void xve_delete_debug_files(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       if (priv->mcg_dentry != NULL)
+               debugfs_remove(priv->mcg_dentry);
+       if (priv->path_dentry != NULL)
+               debugfs_remove(priv->path_dentry);
+}
+
+int xve_register_debugfs(void)
+{
+       xve_root = debugfs_create_dir("xve", NULL);
+       return xve_root ? 0 : -ENOMEM;
+}
+
+void xve_unregister_debugfs(void)
+{
+       debugfs_remove(xve_root);
+}
+#endif
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_ib.c b/drivers/infiniband/ulp/xsigo/xve/xve_ib.c
new file mode 100644 (file)
index 0000000..85ebdb0
--- /dev/null
@@ -0,0 +1,1132 @@
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+static DEFINE_MUTEX(pkey_mutex);
+
+struct xve_ah *xve_create_ah(struct net_device *dev,
+                            struct ib_pd *pd, struct ib_ah_attr *attr)
+{
+       struct xve_ah *ah;
+
+       ah = kmalloc(sizeof(*ah), GFP_KERNEL);
+       if (!ah)
+               return NULL;
+
+       ah->dev = dev;
+       ah->last_send = 0;
+       kref_init(&ah->ref);
+
+       ah->ah = ib_create_ah(pd, attr);
+       if (IS_ERR(ah->ah)) {
+               kfree(ah);
+               ah = NULL;
+       } else
+               xve_debug(DEBUG_MCAST_INFO, netdev_priv(dev),
+                         "%s Created ah %p\n", __func__, ah->ah);
+
+       return ah;
+}
+
+void xve_free_ah(struct kref *kref)
+{
+       struct xve_ah *ah = container_of(kref, struct xve_ah, ref);
+       struct xve_dev_priv *priv = netdev_priv(ah->dev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       list_add_tail(&ah->list, &priv->dead_ahs);
+       spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void xve_ud_dma_unmap_rx(struct xve_dev_priv *priv,
+                               u64 mapping[XVE_UD_RX_SG])
+{
+       if (xve_ud_need_sg(priv->max_ib_mtu)) {
+               ib_dma_unmap_single(priv->ca, mapping[0], XVE_UD_HEAD_SIZE,
+                                   DMA_FROM_DEVICE);
+               ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
+                                 DMA_FROM_DEVICE);
+               xve_counters[XVE_NUM_PAGES_ALLOCED]--;
+       } else
+               ib_dma_unmap_single(priv->ca, mapping[0],
+                                   XVE_UD_BUF_SIZE(priv->max_ib_mtu),
+                                   DMA_FROM_DEVICE);
+}
+
+static void xve_ud_skb_put_frags(struct xve_dev_priv *priv,
+                                struct sk_buff *skb, unsigned int length)
+{
+       if (xve_ud_need_sg(priv->max_ib_mtu)) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+               unsigned int size;
+               /*
+                * There is only two buffers needed for max_payload = 4K,
+                * first buf size is XVE_UD_HEAD_SIZE
+                */
+               skb->tail += XVE_UD_HEAD_SIZE;
+               skb->len += length;
+
+               size = length - XVE_UD_HEAD_SIZE;
+
+               frag->size = size;
+               skb->data_len += size;
+               skb->truesize += size;
+       } else {
+               skb_put(skb, length);
+       }
+
+}
+
+static int xve_ib_post_receive(struct net_device *dev, int id)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_recv_wr *bad_wr;
+       int ret;
+
+       priv->rx_wr.wr_id = id | XVE_OP_RECV;
+       priv->rx_sge[0].addr = priv->rx_ring[id].mapping[0];
+       priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];
+
+       ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr);
+       if (unlikely(ret)) {
+               xve_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
+               xve_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
+               dev_kfree_skb_any(priv->rx_ring[id].skb);
+               priv->rx_ring[id].skb = NULL;
+       }
+
+       return ret;
+}
+
+static struct sk_buff *xve_alloc_rx_skb(struct net_device *dev, int id)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct sk_buff *skb;
+       int buf_size;
+       u64 *mapping;
+
+       if (xve_ud_need_sg(priv->max_ib_mtu))
+               buf_size = XVE_UD_HEAD_SIZE;
+       else
+               buf_size = XVE_UD_BUF_SIZE(priv->max_ib_mtu);
+
+       skb = xve_dev_alloc_skb(priv, buf_size + 10);
+       if (unlikely(!skb))
+               return NULL;
+
+       /*
+        * Eth header is 14 bytes, IB will leave a 40 byte gap for a GRH
+        * so we need 10 more bytes to get to 64 and align the
+        * IP header to a multiple of 16.
+        */
+       skb_reserve(skb, 10);
+
+       mapping = priv->rx_ring[id].mapping;
+       mapping[0] = ib_dma_map_single(priv->ca, skb->data, buf_size,
+                                      DMA_FROM_DEVICE);
+       if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
+               goto error;
+
+       if (xve_ud_need_sg(priv->max_ib_mtu)) {
+               struct page *page = xve_alloc_page(GFP_ATOMIC);
+
+               if (!page)
+                       goto partial_error;
+               skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
+               mapping[1] =
+                   ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[0].page.p,
+                                   0, PAGE_SIZE, DMA_FROM_DEVICE);
+               if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
+                       goto partial_error;
+       }
+
+       priv->rx_ring[id].skb = skb;
+       return skb;
+
+partial_error:
+       ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
+error:
+       dev_kfree_skb_any(skb);
+       return NULL;
+}
+
+static int xve_ib_post_receives(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int i;
+
+       for (i = 0; i < xve_recvq_size; ++i) {
+               if (!xve_alloc_rx_skb(dev, i)) {
+                       xve_warn(priv,
+                                "%s failed to allocate ib receive buffer %d\n",
+                                __func__, i);
+                       return -ENOMEM;
+               }
+               if (xve_ib_post_receive(dev, i)) {
+                       xve_warn(priv,
+                                "%s xve_ib_post_receive failed for buf %d\n",
+                                __func__, i);
+                       return -EIO;
+               }
+       }
+
+       return 0;
+}
+
+static void xve_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       unsigned int wr_id = wc->wr_id & ~XVE_OP_RECV;
+       struct sk_buff *skb;
+       u64 mapping[XVE_UD_RX_SG];
+       struct ib_packed_grh *grhhdr;
+       char *smac;
+       u16 vlan;
+
+       xve_dbg_data(priv, "recv completion: id %d, status: %d\n",
+                    wr_id, wc->status);
+
+       if (unlikely(wr_id >= xve_recvq_size)) {
+               xve_warn(priv, "recv completion event with wrid %d (> %d)\n",
+                        wr_id, xve_recvq_size);
+               return;
+       }
+
+       skb = priv->rx_ring[wr_id].skb;
+
+       if (unlikely(wc->status != IB_WC_SUCCESS)) {
+               if (wc->status != IB_WC_WR_FLUSH_ERR) {
+                       xve_warn(priv, "failed recv event ");
+                       xve_warn(priv, "(status=%d, wrid=%d vend_err %x)\n",
+                                wc->status, wr_id, wc->vendor_err);
+               }
+               xve_ud_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);
+               dev_kfree_skb_any(skb);
+               priv->rx_ring[wr_id].skb = NULL;
+               return;
+       }
+
+       /*
+        * Drop packets that this interface sent, ie multicast packets
+        * that the HCA has replicated.
+        */
+       if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
+               goto repost;
+
+       memcpy(mapping, priv->rx_ring[wr_id].mapping,
+              XVE_UD_RX_SG * sizeof(*mapping));
+
+       /*
+        * If we can't allocate a new RX buffer, dump
+        * this packet and reuse the old buffer.
+        */
+       if (unlikely(!xve_alloc_rx_skb(dev, wr_id))) {
+               INC_RX_DROP_STATS(priv, dev);
+               goto repost;
+       }
+
+       xve_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+                    wc->byte_len, wc->slid);
+
+       xve_ud_dma_unmap_rx(priv, mapping);
+       xve_ud_skb_put_frags(priv, skb, wc->byte_len);
+
+       grhhdr = (struct ib_packed_grh *)(skb->data);
+       smac = skb->data + IB_GRH_BYTES + ETH_ALEN;
+       skb_pull(skb, IB_GRH_BYTES);
+       vlan = xg_vlan_get_rxtag(skb);
+       xve_fwt_insert(priv, NULL, &grhhdr->source_gid, wc->src_qp, smac, vlan);
+       xve_prepare_skb(priv, skb);
+
+       xve_test("%s RX UD pkt %02x %02x %02x %02x %02x %02x %02x %02x %02x",
+                __func__, skb->data[0], skb->data[1], skb->data[2],
+                skb->data[3], skb->data[4], skb->data[5], skb->data[6],
+                skb->data[7], skb->data[8]);
+       xve_test("%02x %02x %02x proto %x for %s\n",
+                skb->data[9], skb->data[10], skb->data[11],
+                skb->protocol, priv->xve_name);
+       xve_send_skb(priv, skb);
+repost:
+       if (unlikely(xve_ib_post_receive(dev, wr_id))) {
+               xve_warn(priv, "xve_ib_post_receive failed ");
+               xve_warn(priv, "for buf %d\n", wr_id);
+       }
+}
+
+static int xve_dma_map_tx(struct ib_device *ca, struct xve_tx_buf *tx_req)
+{
+       struct sk_buff *skb = tx_req->skb;
+       u64 *mapping = tx_req->mapping;
+       int i;
+       int off;
+
+       if (skb_headlen(skb)) {
+               mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
+                                              DMA_TO_DEVICE);
+               if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
+                       return -EIO;
+
+               off = 1;
+       } else
+               off = 0;
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               mapping[i + off] = ib_dma_map_page(ca, skb_frag_page(frag),
+                                                  frag->page_offset,
+                                                  frag->size, DMA_TO_DEVICE);
+               if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
+                       goto partial_error;
+       }
+       return 0;
+
+partial_error:
+       for (; i > 0; --i) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
+
+               ib_dma_unmap_page(ca, mapping[i - !off], frag->size,
+                                 DMA_TO_DEVICE);
+       }
+
+       if (off)
+               ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb),
+                                   DMA_TO_DEVICE);
+
+       return -EIO;
+}
+
+static void xve_dma_unmap_tx(struct ib_device *ca, struct xve_tx_buf *tx_req)
+{
+       struct sk_buff *skb = tx_req->skb;
+       u64 *mapping = tx_req->mapping;
+       int i;
+       int off;
+
+       if (skb_headlen(skb)) {
+               ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb),
+                                   DMA_TO_DEVICE);
+               off = 1;
+       } else
+               off = 0;
+
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               ib_dma_unmap_page(ca, mapping[i + off], frag->size,
+                                 DMA_TO_DEVICE);
+       }
+}
+
+static void xve_free_txbuf_memory(struct xve_dev_priv *priv,
+                                 struct xve_tx_buf *tx_req)
+{
+       if ((tx_req->skb == NULL) || (!tx_req->mapping[0]))
+               xve_debug(DEBUG_DATA_INFO, priv,
+                         "%s [ca %p] tx_req skb %p mapping %lld\n",
+                         __func__, priv->ca, tx_req->skb, tx_req->mapping[0]);
+       else
+               xve_dma_unmap_tx(priv->ca, tx_req);
+
+       xve_dev_kfree_skb_any(priv, tx_req->skb, 1);
+       memset(tx_req, 0, sizeof(struct xve_tx_buf));
+}
+
+static void xve_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       unsigned int wr_id = wc->wr_id;
+       struct xve_tx_buf *tx_req;
+
+       xve_dbg_data(priv, "send completion: id %d, status: %d\n",
+                    wr_id, wc->status);
+
+       if (unlikely(wr_id >= xve_sendq_size)) {
+               xve_warn(priv, "send completion event with wrid %d (> %d)\n",
+                        wr_id, xve_sendq_size);
+               return;
+       }
+
+       tx_req = &priv->tx_ring[wr_id];
+       xve_free_txbuf_memory(priv, tx_req);
+
+       ++priv->tx_tail;
+
+       if (unlikely(--priv->tx_outstanding == xve_sendq_size >> 1) &&
+           netif_queue_stopped(dev) &&
+           test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
+               priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
+               netif_wake_queue(dev);
+       }
+
+       if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) {
+               xve_warn(priv, "failed send event ");
+               xve_warn(priv, "(status=%d, wrid=%d vend_err %x)\n",
+                        wc->status, wr_id, wc->vendor_err);
+       }
+}
+
+int poll_tx(struct xve_dev_priv *priv)
+{
+       int n, i, tot = 0;
+
+       do {
+               n = ib_poll_cq(priv->send_cq, MAX_SEND_CQE, priv->send_wc);
+               /* handle multiple WC's in one call */
+               if (likely(n > 0)) {
+                       for (i = 0; i < n; ++i)
+                               xve_ib_handle_tx_wc(priv->netdev,
+                                                   priv->send_wc + i);
+                       tot += n;
+               } else if (n == 0) {
+                       break;
+               } else {
+                       xve_warn(priv, "%s ib_poll_cq() failed, rc %d\n",
+                                __func__, n);
+               }
+
+       } while (n == MAX_SEND_CQE);
+
+       return tot;
+}
+
+static int poll_rx(struct xve_dev_priv *priv, int num_polls, int *done,
+                  int flush)
+{
+       int n, i;
+
+       n = ib_poll_cq(priv->recv_cq, num_polls, priv->ibwc);
+       for (i = 0; i < n; ++i) {
+               /*
+                * Convert any successful completions to flush
+                * errors to avoid passing packets up the
+                * stack after bringing the device down.
+                */
+               if (flush && (priv->ibwc[i].status == IB_WC_SUCCESS))
+                       priv->ibwc[i].status = IB_WC_WR_FLUSH_ERR;
+
+               if (priv->ibwc[i].wr_id & XVE_OP_RECV) {
+                       ++(*done);
+                       if (priv->ibwc[i].wr_id & XVE_OP_CM)
+                               xve_cm_handle_rx_wc(priv->netdev,
+                                                   priv->ibwc + i);
+                       else
+                               xve_ib_handle_rx_wc(priv->netdev,
+                                                   priv->ibwc + i);
+               } else
+                       xve_cm_handle_tx_wc(priv->netdev, priv->ibwc + i);
+       }
+       return n;
+}
+
+int xve_poll(struct napi_struct *napi, int budget)
+{
+       struct xve_dev_priv *priv =
+           container_of(napi, struct xve_dev_priv, napi);
+       struct net_device *dev = priv->netdev;
+       int done, n, t;
+       unsigned long flags = 0;
+
+       done = 0;
+
+       priv->counters[XVE_NAPI_POLL_COUNTER]++;
+       /*
+        * If not connected complete it
+        */
+       if (!test_bit(XVE_OPER_UP, &priv->state)) {
+               napi_complete(&priv->napi);
+               clear_bit(XVE_INTR_ENABLED, &priv->state);
+               return 0;
+       }
+
+poll_more:
+       while (done < budget) {
+               int max = (budget - done);
+
+               t = min(XVE_NUM_WC, max);
+               n = poll_rx(priv, t, &done, 0);
+               if (n != t)
+                       break;
+       }
+
+       if (done < budget) {
+               if (dev->features & NETIF_F_LRO)
+                       lro_flush_all(&priv->lro.lro_mgr);
+
+               napi_complete(napi);
+               clear_bit(XVE_OVER_QUOTA, &priv->state);
+       } else {
+               set_bit(XVE_OVER_QUOTA, &priv->state);
+               priv->counters[XVE_RX_QUOTA_EXCEEDED_COUNTER]++;
+               return done;
+       }
+
+       spin_lock_irqsave(&priv->lock, flags);
+       if (test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
+           test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
+           test_bit(XVE_OPER_UP, &priv->state) &&
+           !test_bit(XVE_DELETING, &priv->state)) {
+               set_bit(XVE_INTR_ENABLED, &priv->state);
+               if (unlikely
+                   (ib_req_notify_cq
+                    (priv->recv_cq,
+                     IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS))
+                   && napi_reschedule(napi)) {
+                       priv->counters[XVE_NAPI_RESCHEDULE_COUNTER]++;
+                       spin_unlock_irqrestore(&priv->lock, flags);
+                       goto poll_more;
+               }
+       }
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       return done;
+}
+
+void xve_ib_completion(struct ib_cq *cq, void *dev_ptr)
+{
+       struct net_device *dev = dev_ptr;
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       xve_data_recv_handler(priv);
+
+}
+
+/*
+ * Data is pending, in interrupt context
+ */
+void xve_data_recv_handler(struct xve_dev_priv *priv)
+{
+
+       if (test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
+           test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) &&
+           test_bit(XVE_OPER_UP, &priv->state) &&
+           !test_bit(XVE_DELETING, &priv->state)) {
+               priv->counters[XVE_NAPI_SCHED_COUNTER]++;
+               clear_bit(XVE_INTR_ENABLED, &priv->state);
+               napi_schedule(&priv->napi);
+       } else
+               priv->counters[XVE_NAPI_NOTSCHED_COUNTER]++;
+}
+
+void xve_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
+{
+       struct xve_dev_priv *priv = netdev_priv((struct net_device *)dev_ptr);
+       unsigned long flags = 0;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       if (test_bit(XVE_OPER_UP, &priv->state) &&
+           !test_bit(XVE_DELETING, &priv->state)) {
+               poll_tx(priv);
+       }
+       spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static inline int post_send(struct xve_dev_priv *priv,
+                           unsigned int wr_id,
+                           struct ib_ah *address, u32 qpn,
+                           struct xve_tx_buf *tx_req, void *head, int hlen)
+{
+       struct ib_send_wr *bad_wr;
+       int i, off;
+       struct sk_buff *skb = tx_req->skb;
+       skb_frag_t *frags = skb_shinfo(skb)->frags;
+       int nr_frags = skb_shinfo(skb)->nr_frags;
+       u64 *mapping = tx_req->mapping;
+
+       if (skb_headlen(skb)) {
+               priv->tx_sge[0].addr = mapping[0];
+               priv->tx_sge[0].length = skb_headlen(skb);
+               off = 1;
+       } else
+               off = 0;
+
+       for (i = 0; i < nr_frags; ++i) {
+               priv->tx_sge[i + off].addr = mapping[i + off];
+               priv->tx_sge[i + off].length = frags[i].size;
+       }
+       priv->tx_wr.num_sge = nr_frags + off;
+       priv->tx_wr.wr_id = wr_id;
+       priv->tx_wr.wr.ud.remote_qpn = qpn;
+       priv->tx_wr.wr.ud.ah = address;
+
+       if (head) {
+               priv->tx_wr.wr.ud.mss = skb_shinfo(skb)->gso_size;
+               priv->tx_wr.wr.ud.header = head;
+               priv->tx_wr.wr.ud.hlen = hlen;
+               priv->tx_wr.opcode = IB_WR_LSO;
+       } else
+               priv->tx_wr.opcode = IB_WR_SEND;
+
+       return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
+}
+
+void xve_send(struct net_device *dev, struct sk_buff *skb,
+             struct xve_ah *address, u32 qpn)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_tx_buf *tx_req;
+       int hlen;
+       void *phead;
+
+       if (skb_is_gso(skb)) {
+               hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
+               phead = skb->data;
+               if (unlikely(!skb_pull(skb, hlen))) {
+                       xve_warn(priv,
+                                "%s linear data too small dropping %ld packets %s\n",
+                                __func__, dev->stats.tx_dropped, dev->name);
+                       INC_TX_DROP_STATS(priv, dev);
+                       INC_TX_ERROR_STATS(priv, dev);
+                       dev_kfree_skb_any(skb);
+                       return;
+               }
+       } else {
+               if (unlikely(skb->len > priv->mcast_mtu + VLAN_ETH_HLEN)) {
+                       xve_warn(priv, "%s packet len %d", __func__, skb->len);
+                       xve_warn(priv, "(> %d) too long to", priv->mcast_mtu);
+                       xve_warn(priv, "send,dropping %ld packets %s\n",
+                                dev->stats.tx_dropped, dev->name);
+                       INC_TX_DROP_STATS(priv, dev);
+                       INC_TX_ERROR_STATS(priv, dev);
+                       dev_kfree_skb_any(skb);
+                       return;
+               }
+               phead = NULL;
+               hlen = 0;
+       }
+
+       xve_dbg_data(priv,
+                    "%s sending packet, length=%d address=%p qpn=0x%06x\n",
+                    __func__, skb->len, address, qpn);
+
+       /*
+        * We put the skb into the tx_ring _before_ we call post_send()
+        * because it's entirely possible that the completion handler will
+        * run before we execute anything after the post_send().  That
+        * means we have to make sure everything is properly recorded and
+        * our state is consistent before we call post_send().
+        */
+       tx_req = &priv->tx_ring[priv->tx_head & (xve_sendq_size - 1)];
+       tx_req->skb = skb;
+       if (unlikely(xve_dma_map_tx(priv->ca, tx_req))) {
+               INC_TX_ERROR_STATS(priv, dev);
+               dev_kfree_skb_any(tx_req->skb);
+               memset(tx_req, 0, sizeof(struct xve_tx_buf));
+               return;
+       }
+
+       if (++priv->tx_outstanding == xve_sendq_size) {
+               xve_dbg_data(priv,
+                            "%s TX ring full, stopping kernel net queue\n",
+                            __func__);
+               if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
+                       xve_warn(priv, "%s request notify on send CQ failed\n",
+                                __func__);
+               priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+               priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
+               netif_stop_queue(dev);
+       }
+
+       if (unlikely(post_send(priv, priv->tx_head & (xve_sendq_size - 1),
+                              address->ah, qpn, tx_req, phead, hlen))) {
+               xve_warn(priv, "%s post_send failed\n", __func__);
+               INC_TX_ERROR_STATS(priv, dev);
+               --priv->tx_outstanding;
+               priv->counters[XVE_TX_RING_FULL_COUNTER]++;
+               xve_free_txbuf_memory(priv, tx_req);
+               if (netif_queue_stopped(dev)) {
+                       priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
+                       netif_wake_queue(dev);
+               }
+       } else {
+               address->last_send = priv->tx_head;
+               ++priv->tx_head;
+               skb_orphan(skb);
+       }
+       priv->send_hbeat_flag = 0;
+       if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+               poll_tx(priv);
+}
+
+static void __xve_reap_ah(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_ah *ah, *tah;
+       LIST_HEAD(remove_list);
+       unsigned long flags = 0;
+
+       netif_tx_lock_bh(dev);
+       spin_lock_irqsave(&priv->lock, flags);
+
+       list_for_each_entry_safe(ah, tah, &priv->dead_ahs, list)
+               if ((int)priv->tx_tail - (int)ah->last_send >= 0) {
+                       list_del(&ah->list);
+                       ib_destroy_ah(ah->ah);
+                       kfree(ah);
+               }
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+       netif_tx_unlock_bh(dev);
+}
+
+void xve_reap_ah(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_AHREAP, 1);
+       struct net_device *dev = priv->netdev;
+
+       __xve_reap_ah(dev);
+
+       /* STOP_REAPER is set in xve_stop */
+       if (!test_bit(XVE_STOP_REAPER, &priv->flags))
+               xve_queue_dwork(priv, XVE_WQ_START_AHREAP,
+                               round_jiffies_relative(HZ));
+       xve_put_ctx(priv);
+}
+
+static void xve_ah_dev_cleanup(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       unsigned long begin;
+
+       begin = jiffies;
+
+       while (!list_empty(&priv->dead_ahs)) {
+               __xve_reap_ah(dev);
+
+               if (time_after(jiffies, begin + HZ)) {
+                       xve_warn(priv,
+                                "timing out; will leak address handles\n");
+                       break;
+               }
+
+               msleep(20);
+       }
+}
+
+static void xve_pkey_dev_check_presence(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       u16 pkey_index = 0;
+
+       if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index))
+               clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+       else
+               set_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+}
+
+int xve_ib_dev_up(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       xve_debug(DEBUG_IBDEV_INFO, priv, "%s Bring up ib_dev\n", __func__);
+       xve_pkey_dev_check_presence(dev);
+       if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags)) {
+               xve_debug(DEBUG_IBDEV_INFO, priv, "%s PKEY is not assigned\n",
+                         __func__);
+               return 0;
+       }
+
+       set_bit(XVE_FLAG_OPER_UP, &priv->flags);
+
+       return xve_mcast_start_thread(dev);
+}
+
+int xve_ib_dev_down(struct net_device *dev, int flush)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       xve_debug(DEBUG_IBDEV_INFO, priv, "%s downing ib_dev\n", __func__);
+       if (!test_and_clear_bit(XVE_FLAG_OPER_UP, &priv->flags)) {
+               xve_debug(DEBUG_IBDEV_INFO, priv,
+                         "%s Down IB without being up\n", __func__);
+               return 0;
+       }
+
+       netif_carrier_off(priv->netdev);
+
+       /* Shutdown the P_Key thread if still active */
+       if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags)) {
+               mutex_lock(&pkey_mutex);
+               set_bit(XVE_PKEY_STOP, &priv->flags);
+               mutex_unlock(&pkey_mutex);
+       }
+
+       xve_mcast_stop_thread(dev, flush);
+       xve_mcast_dev_flush(dev);
+
+       xve_flush_paths(dev);
+
+       return 0;
+}
+
+static int recvs_pending(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int pending = 0;
+       int i;
+
+       for (i = 0; i < xve_recvq_size; ++i)
+               if (priv->rx_ring[i].skb)
+                       ++pending;
+
+       return pending;
+}
+
+void xve_drain_cq(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int n, done = 0;
+
+       /*
+        * We call completion handling routines that expect to be
+        * called from the BH-disabled NAPI poll context, so disable
+        * BHs here too.
+        */
+       local_bh_disable();
+
+       do {
+               n = poll_rx(priv, XVE_NUM_WC, &done, 1);
+       } while (n == XVE_NUM_WC);
+
+       poll_tx(priv);
+       local_bh_enable();
+}
+
+int xve_ib_dev_open(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int ret;
+
+       xve_debug(DEBUG_IBDEV_INFO, priv, "%s Open  ib_dev\n", __func__);
+       if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) {
+               xve_warn(priv, "%s P_Key 0x%04x not found\n", __func__,
+                        priv->pkey);
+               clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+               return -1;
+       }
+       set_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+
+       ret = xve_init_qp(dev);
+       if (ret != 0) {
+               xve_warn(priv, "%s xve_init_qp returned %d\n", __func__, ret);
+               return -1;
+       }
+
+       ret = xve_ib_post_receives(dev);
+       if (ret != 0) {
+               xve_warn(priv, "%s xve_ib_post_receives returned %d\n",
+                        __func__, ret);
+               xve_ib_dev_stop(dev, 1);
+               return -1;
+       }
+
+       ret = xve_cm_dev_open(dev);
+       if (ret != 0) {
+               xve_warn(priv, "%s xve_cm_dev_open returned %d\n", __func__,
+                        ret);
+               xve_ib_dev_stop(dev, 1);
+               return -1;
+       }
+
+       clear_bit(XVE_STOP_REAPER, &priv->flags);
+       xve_queue_dwork(priv, XVE_WQ_START_AHREAP,
+                       3 * round_jiffies_relative(HZ));
+
+       if (!test_and_set_bit(XVE_FLAG_INITIALIZED, &priv->flags))
+               napi_enable(&priv->napi);
+
+       /* Set IB Dev to open */
+       set_bit(XVE_IB_DEV_OPEN, &priv->flags);
+
+       return 0;
+}
+
+int xve_ib_dev_stop(struct net_device *dev, int flush)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_qp_attr qp_attr;
+       unsigned long begin;
+       struct xve_tx_buf *tx_req;
+       int i;
+
+       xve_debug(DEBUG_IBDEV_INFO, priv, "%s Stop  ib_dev\n", __func__);
+       /* IB Dev stop */
+       if (!test_and_clear_bit(XVE_IB_DEV_OPEN, &priv->flags)) {
+               xve_debug(DEBUG_IBDEV_INFO, priv,
+                         "%s Stop IB without being up\n", __func__);
+               return 0;
+       }
+
+       if (test_and_clear_bit(XVE_FLAG_INITIALIZED, &priv->flags))
+               napi_disable(&priv->napi);
+
+       xve_cm_dev_stop(dev);
+
+       /*
+        * Move our QP to the error state and then reinitialize in
+        * when all work requests have completed or have been flushed.
+        */
+       qp_attr.qp_state = IB_QPS_ERR;
+       if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+               xve_warn(priv, "Failed to modify QP to ERROR state\n");
+
+       /* Wait for all sends and receives to complete */
+       begin = jiffies;
+
+       while (priv->tx_head != priv->tx_tail || recvs_pending(dev)) {
+               if (time_after(jiffies, begin + 5 * HZ)) {
+                       xve_warn(priv,
+                                "%s timing out; %d sends %d receives not completed\n",
+                                __func__, priv->tx_head - priv->tx_tail,
+                                recvs_pending(dev));
+
+                       /*
+                        * assume the HW is wedged and just free up
+                        * all our pending work requests.
+                        */
+                       while ((int)priv->tx_tail - (int)priv->tx_head < 0) {
+                               tx_req = &priv->tx_ring[priv->tx_tail &
+                                                       (xve_sendq_size - 1)];
+                               xve_free_txbuf_memory(priv, tx_req);
+                               ++priv->tx_tail;
+                               --priv->tx_outstanding;
+                       }
+
+                       for (i = 0; i < xve_recvq_size; ++i) {
+                               struct xve_rx_buf *rx_req;
+
+                               rx_req = &priv->rx_ring[i];
+                               if (!rx_req->skb)
+                                       continue;
+                               xve_ud_dma_unmap_rx(priv,
+                                                   priv->rx_ring[i].mapping);
+                               xve_dev_kfree_skb_any(priv, rx_req->skb, 0);
+                               rx_req->skb = NULL;
+                       }
+
+                       goto timeout;
+               }
+               xve_drain_cq(dev);
+               msleep(20);
+       }
+
+       xve_debug(DEBUG_IBDEV_INFO, priv, "%s All sends and receives done\n",
+                 __func__);
+timeout:
+       qp_attr.qp_state = IB_QPS_RESET;
+       if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+               xve_warn(priv, "Failed to modify QP to RESET state\n");
+
+       /* Wait for all AHs to be reaped */
+       set_bit(XVE_STOP_REAPER, &priv->flags);
+       xve_ah_dev_cleanup(dev);
+
+       ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
+
+       return 0;
+}
+
+int xve_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       priv->ca = ca;
+       priv->port = port;
+       priv->qp = NULL;
+
+       if (xve_transport_dev_init(dev, ca) != 0) {
+               pr_warn("%s: xve_transport_dev_init failed for %s\n",
+                       ca->name, priv->xve_name);
+               return -ENODEV;
+       }
+
+       if (dev->flags & IFF_UP) {
+               if (xve_ib_dev_open(dev) != 0) {
+                       xve_transport_dev_cleanup(dev);
+                       return -ENODEV;
+               }
+       }
+
+       return 0;
+}
+
+static void __xve_ib_dev_flush(struct xve_dev_priv *priv,
+                              enum xve_flush_level level)
+{
+       struct net_device *dev = priv->netdev;
+       u16 new_index;
+       unsigned long flags;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       if (!test_bit(XVE_FLAG_INITIALIZED, &priv->flags) ||
+           !test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)) {
+               xve_debug(DEBUG_IBDEV_INFO, priv,
+                       "%s Not flushing XVE_FLAG_ADMIN_UP/", __func__);
+               xve_debug(DEBUG_IBDEV_INFO, priv,
+                         "XVE_FLAG_INITIALIZED not set flags %lx\n",
+                       priv->flags);
+               goto out;
+       }
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       if (level == XVE_FLUSH_HEAVY) {
+               if (ib_find_pkey(priv->ca, priv->port, priv->pkey,
+                       &new_index)) {
+                       clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+                       xve_ib_dev_down(dev, 0);
+                       xve_ib_dev_stop(dev, 0);
+                       if (xve_pkey_dev_delay_open(dev))
+                               return;
+               }
+
+               /* restart QP only if P_Key index is changed */
+               if (test_and_set_bit(XVE_PKEY_ASSIGNED, &priv->flags) &&
+                   new_index == priv->pkey_index) {
+                       xve_debug(DEBUG_IBDEV_INFO, priv,
+                                 "%s PKey index not changed\n", __func__);
+                       return;
+               }
+               priv->pkey_index = new_index;
+       }
+
+       if (level == XVE_FLUSH_LIGHT) {
+               xve_mark_paths_invalid(dev);
+               xve_mcast_dev_flush(dev);
+               clear_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags);
+       }
+
+       if (level >= XVE_FLUSH_NORMAL)
+               xve_ib_dev_down(dev, 0);
+
+       if (level == XVE_FLUSH_HEAVY) {
+               xve_ib_dev_stop(dev, 0);
+               xve_ib_dev_open(dev);
+       }
+       spin_lock_irqsave(&priv->lock, flags);
+       set_bit(XVE_FLAG_IB_EVENT, &priv->flags);
+out:
+       spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+void xve_ib_dev_flush_light(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_FLUSHLIGHT, 0);
+
+       __xve_ib_dev_flush(priv, XVE_FLUSH_LIGHT);
+       xve_put_ctx(priv);
+}
+
+void xve_ib_dev_flush_normal(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_FLUSHNORMAL, 0);
+
+       __xve_ib_dev_flush(priv, XVE_FLUSH_NORMAL);
+       xve_put_ctx(priv);
+}
+
+void xve_ib_dev_flush_heavy(struct work_struct *work)
+{
+
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_FLUSHHEAVY, 0);
+
+       __xve_ib_dev_flush(priv, XVE_FLUSH_HEAVY);
+       xve_put_ctx(priv);
+}
+
+void xve_ib_dev_cleanup(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       xve_debug(DEBUG_IBDEV_INFO, priv, "%s Cleaning up ib_dev\n", __func__);
+
+       xve_mcast_stop_thread(dev, 1);
+       xve_mcast_dev_flush(dev);
+       xve_ah_dev_cleanup(dev);
+       xve_transport_dev_cleanup(dev);
+
+}
+
+/*
+ * Delayed P_Key Assigment Interim Support
+ *
+ * The following is initial implementation of delayed P_Key assignment
+ * mechanism. It is using the same approach implemented for the multicast
+ * group join. The single goal of this implementation is to quickly address
+ * Bug #2507. This implementation will probably be removed when the P_Key
+ * change async notification is available.
+ */
+
+void xve_pkey_poll(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_PKEYPOLL, 1);
+       struct net_device *dev = priv->netdev;
+
+       xve_pkey_dev_check_presence(dev);
+
+       if (test_bit(XVE_PKEY_ASSIGNED, &priv->flags))
+               xve_open(dev);
+       else {
+               mutex_lock(&pkey_mutex);
+               if (!test_bit(XVE_PKEY_STOP, &priv->flags))
+                       xve_queue_dwork(priv, XVE_WQ_START_PKEYPOLL, HZ);
+               mutex_unlock(&pkey_mutex);
+       }
+       xve_put_ctx(priv);
+}
+
+int xve_pkey_dev_delay_open(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       /* Look for the interface pkey value in the IB Port P_Key table and */
+       /* set the interface pkey assignment flag                            */
+       xve_pkey_dev_check_presence(dev);
+
+       /* P_Key value not assigned yet - start polling */
+       if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags)) {
+               mutex_lock(&pkey_mutex);
+               clear_bit(XVE_PKEY_STOP, &priv->flags);
+               xve_queue_dwork(priv, XVE_WQ_START_PKEYPOLL, HZ);
+               mutex_unlock(&pkey_mutex);
+               return 1;
+       }
+
+       return 0;
+}
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_main.c b/drivers/infiniband/ulp/xsigo/xve/xve_main.c
new file mode 100644 (file)
index 0000000..dfcc700
--- /dev/null
@@ -0,0 +1,2471 @@
+/*
+ * Copyright (c) 2011 Xsigo Systems.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+#ifndef XSIGO_LOCAL_VERSION
+#define DRIVER_VERSION "0.31"
+#else
+#define DRIVER_VERSION XSIGO_LOCAL_VERSION
+#endif
+
+static int xve_xsmp_service_id = -1;
+struct mutex xve_mutex;
+static spinlock_t xve_lock;
+u32 xve_counters[XVE_MAX_GLOB_COUNTERS];
+
+MODULE_AUTHOR("Oracle corp (OVN-linux-drivers@oracle.com)");
+MODULE_DESCRIPTION("OVN Virtual Ethernet driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRIVER_VERSION);
+
+int xve_sendq_size __read_mostly = XVE_TX_RING_SIZE;
+int xve_recvq_size __read_mostly = XVE_RX_RING_SIZE;
+module_param_named(send_queue_size, xve_sendq_size, int, 0444);
+MODULE_PARM_DESC(send_queue_size, "Number of descriptors in send queue");
+module_param_named(recv_queue_size, xve_recvq_size, int, 0444);
+MODULE_PARM_DESC(recv_queue_size, "Number of descriptors in receive queue");
+
+static int napi_weight = 128;
+module_param(napi_weight, int, 0644);
+
+static int xve_no_tx_checksum_offload;
+module_param(xve_no_tx_checksum_offload, int, 0644);
+
+int lro;
+module_param(lro, bool, 0444);
+MODULE_PARM_DESC(lro, "Enable LRO (Large Receive Offload)");
+
+static int lro_max_aggr = XVE_LRO_MAX_AGGR;
+module_param(lro_max_aggr, int, 0644);
+MODULE_PARM_DESC(lro_max_aggr,
+                "LRO: Max packets to be aggregated (default = 64)");
+
+static int xve_hbeat_enable;
+module_param(xve_hbeat_enable, int, 0644);
+MODULE_PARM_DESC(xve_hbeat_enable, "Enable/Disable heartbeat");
+
+static int xve_aging_timeout = 5 * 60;
+module_param(xve_aging_timeout, int, 0644);
+MODULE_PARM_DESC(xve_aging_timeout, "Aging timeout in seconds");
+
+static int xve_flood_rc = 1;
+module_param(xve_flood_rc, int, 0644);
+MODULE_PARM_DESC(xve_flood_rc, "Enable/Disable flood mode for RC");
+
+int xve_debug_level;
+module_param_named(xve_debug_level, xve_debug_level, int, 0644);
+MODULE_PARM_DESC(xve_debug_level, "Enable debug tracing ");
+
+int xve_cm_single_qp;
+module_param_named(xve_cm_single_qp, xve_cm_single_qp, int, 0644);
+
+int xve_mc_sendonly_timeout;
+module_param_named(mc_sendonly_timeout, xve_mc_sendonly_timeout, int, 0644);
+MODULE_PARM_DESC(mc_sendonly_timeout, "Enable debug tracing if > 0");
+
+int xve_do_arp = 1;
+module_param_named(do_arp, xve_do_arp, int, 0644);
+MODULE_PARM_DESC(do_arp, "Enable/Disable ARP for NIC MTU less than IB-MTU");
+
+static void xve_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data,
+                                  int len);
+
+struct xve_path_iter {
+       struct net_device *dev;
+       struct xve_path path;
+};
+
+static const u8 bcast_mgid[] = {
+       0xff, 0x12, 0x40, 0x1c, 0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff
+};
+
+static u8 ipv6_dmac_addr[] = {
+       0xff, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+       0x00, 0x00, 0x00, 0x01, 0xff, 0x00, 0x00, 0x00
+};
+
+struct workqueue_struct *xve_workqueue;
+struct workqueue_struct *xve_taskqueue;
+
+struct ib_sa_client xve_sa_client;
+
+struct list_head xve_dev_list;
+
+static inline int xve_esx_preregister_setup(struct net_device *netdev)
+{
+       xg_preregister_pseudo_device(netdev);
+       return 0;
+}
+
+static inline int xve_esx_postregister_setup(struct net_device *netdev)
+{
+       return 0;
+}
+
+static inline void vmk_notify_uplink(struct net_device *netdev)
+{
+
+}
+
+int xve_open(struct net_device *netdev)
+{
+       struct xve_dev_priv *priv = netdev_priv(netdev);
+       unsigned long flags = 0;
+
+       pr_info("XVE: %s Bringing interface up %s\n", __func__, priv->xve_name);
+       priv->counters[XVE_OPEN_COUNTER]++;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       set_bit(XVE_FLAG_ADMIN_UP, &priv->flags);
+       set_bit(XVE_OPER_UP, &priv->state);
+       set_bit(XVE_OS_ADMIN_UP, &priv->state);
+       priv->port_speed = xve_calc_speed(priv);
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       if (xve_pkey_dev_delay_open(netdev))
+               return 0;
+
+       if (xve_ib_dev_open(netdev))
+               goto err_disable;
+
+       if (xve_ib_dev_up(netdev))
+               goto err_stop;
+
+       queue_age_work(priv, 0);
+
+       return 0;
+
+err_stop:
+       xve_ib_dev_stop(netdev, 1);
+
+err_disable:
+       clear_bit(XVE_FLAG_ADMIN_UP, &priv->flags);
+
+       return -EINVAL;
+}
+
+static int xve_stop(struct net_device *netdev)
+{
+       struct xve_dev_priv *priv = netdev_priv(netdev);
+       unsigned long flags = 0;
+
+       pr_info("XVE: %s Stopping interface %s\n", __func__, priv->xve_name);
+
+       spin_lock_irqsave(&priv->lock, flags);
+       clear_bit(XVE_FLAG_ADMIN_UP, &priv->flags);
+       clear_bit(XVE_OPER_UP, &priv->state);
+       clear_bit(XVE_OS_ADMIN_UP, &priv->state);
+       handle_carrier_state(priv, 0);
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       xve_ib_dev_down(netdev, 0);
+       xve_ib_dev_stop(netdev, 0);
+
+       pr_info("XVE: %s Finished Stopping interface %s\n", __func__,
+               priv->xve_name);
+       return 0;
+}
+
+int xve_modify_mtu(struct net_device *netdev, int new_mtu)
+{
+       struct xve_dev_priv *priv = netdev_priv(netdev);
+
+       pr_info("XVE: %s changing mtu from %d to %d\n",
+               priv->xve_name, priv->admin_mtu, new_mtu);
+       if (new_mtu == netdev->mtu)
+               return 0;
+
+       /* dev->mtu > 2K ==> connected mode */
+       if (xve_cm_admin_enabled(netdev)) {
+               if (new_mtu > xve_cm_max_mtu(netdev))
+                       return -EINVAL;
+
+               netdev->mtu = new_mtu;
+               return 0;
+       }
+
+       if (new_mtu > XVE_UD_MTU(priv->max_ib_mtu))
+               return -EINVAL;
+
+       priv->admin_mtu = new_mtu;
+       netdev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
+       xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
+       (void)xve_xsmp_handle_oper_req(priv->xsmp_hndl, priv->resource_id);
+
+       return 0;
+}
+
+static int xve_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       return xve_modify_mtu(netdev, new_mtu);
+}
+
+static int xve_set_mac_address(struct net_device *dev, void *p)
+{
+       struct sockaddr *addr = p;
+
+       if (!is_valid_ether_addr(addr->sa_data))
+               return -EINVAL;
+       memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+       return 0;
+}
+
+static struct net_device_stats *xve_get_stats(struct net_device *netdev)
+{
+       struct xve_dev_priv *priv = netdev_priv(netdev);
+
+       priv->counters[XVE_GETSTATS_COUNTER]++;
+       return &priv->netdev->stats;
+}
+
+static int xve_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+       struct mii_ioctl_data *data = if_mii(ifr);
+       int ret = 0;
+       struct xve_dev_priv *priv;
+
+       if (!netdev && !netif_running(netdev))
+               return -EAGAIN;
+
+       priv = netdev_priv(netdev);
+       priv->counters[XVE_IOCTL_COUNTER]++;
+
+       switch (cmd) {
+       case SIOCGMIIPHY:
+               data->phy_id = 5;
+               break;
+       case SIOCGMIIREG:
+               /*
+                * Mainly used by mii monitor
+                */
+               switch (data->reg_num) {
+               case 0:
+                       data->val_out = 0x2100;
+                       break;
+               case 1:
+                       data->val_out = 0xfe00 |
+                           (netif_carrier_ok(netdev) << 2);
+                       break;
+               default:
+                       break;
+               }
+               break;
+       default:
+               ret = -EOPNOTSUPP;
+               break;
+       }
+       return ret;
+}
+
+struct xve_path *__path_find(struct net_device *netdev, void *gid)
+{
+       struct xve_dev_priv *priv = netdev_priv(netdev);
+       struct rb_node *n = priv->path_tree.rb_node;
+       struct xve_path *path;
+       int ret;
+
+       while (n) {
+               path = rb_entry(n, struct xve_path, rb_node);
+
+               ret = memcmp(gid, path->pathrec.dgid.raw, sizeof(union ib_gid));
+
+               if (ret < 0)
+                       n = n->rb_left;
+               else if (ret > 0)
+                       n = n->rb_right;
+               else
+                       return path;
+       }
+
+       return NULL;
+}
+
+static int __path_add(struct net_device *netdev, struct xve_path *path)
+{
+       struct xve_dev_priv *priv = netdev_priv(netdev);
+       struct rb_node **n = &priv->path_tree.rb_node;
+       struct rb_node *pn = NULL;
+       struct xve_path *tpath;
+       int ret;
+
+       while (*n) {
+               pn = *n;
+               tpath = rb_entry(pn, struct xve_path, rb_node);
+
+               ret = memcmp(path->pathrec.dgid.raw, tpath->pathrec.dgid.raw,
+                            sizeof(union ib_gid));
+               if (ret < 0)
+                       n = &pn->rb_left;
+               else if (ret > 0)
+                       n = &pn->rb_right;
+               else
+                       return -EEXIST;
+       }
+
+       rb_link_node(&path->rb_node, pn, n);
+       rb_insert_color(&path->rb_node, &priv->path_tree);
+
+       list_add_tail(&path->list, &priv->path_list);
+
+       return 0;
+}
+
+void xve_flush_l2_entries(struct net_device *netdev, struct xve_path *path,
+                         int do_lock)
+{
+       struct xve_dev_priv *priv = netdev_priv(netdev);
+       struct xve_fwt_entry *fwt_entry, *tn;
+       unsigned long flags = 0;
+
+       if (do_lock)
+               spin_lock_irqsave(&priv->lock, flags);
+
+       list_for_each_entry_safe(fwt_entry, tn, &path->fwt_list, list)
+               xve_fwt_entry_destroy(priv, fwt_entry);
+
+       if (do_lock)
+               spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void path_free(struct net_device *netdev, struct xve_path *path)
+{
+       struct xve_dev_priv *priv = netdev_priv(netdev);
+       struct sk_buff *skb;
+       unsigned long flags;
+
+       while ((skb = __skb_dequeue(&path->queue)))
+               dev_kfree_skb_irq(skb);
+
+       spin_lock_irqsave(&priv->lock, flags);
+       if (xve_cmtx_get(path)) {
+               spin_unlock_irqrestore(&priv->lock, flags);
+               xve_cm_destroy_tx_deferred(xve_cmtx_get(path));
+               spin_lock_irqsave(&priv->lock, flags);
+       }
+       xve_flush_l2_entries(netdev, path, 0);
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       if (path->ah)
+               xve_put_ah(path->ah);
+       kfree(path);
+}
+
+/*
+ * Called with priv->lock held
+ */
+static void xve_flood_all_paths(struct net_device *dev, struct sk_buff *skb)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_path *path;
+       struct sk_buff *nskb;
+
+       list_for_each_entry(path, &priv->path_list, list) {
+               if (xve_cmtx_get(path) && xve_cm_up(path)) {
+                       nskb = skb_clone(skb, GFP_ATOMIC);
+                       if (nskb)
+                               xve_cm_send(dev, nskb, xve_cmtx_get(path));
+               }
+       }
+}
+
+void xve_mark_paths_invalid(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_path *path, *tp;
+
+       spin_lock_irq(&priv->lock);
+
+       list_for_each_entry_safe(path, tp, &priv->path_list, list) {
+               xve_debug(DEBUG_IBDEV_INFO, priv,
+                         "%s mark path LID 0x%04x GID %pI6 invalid\n",
+                         __func__, be16_to_cpu(path->pathrec.dlid),
+                         path->pathrec.dgid.raw);
+               path->valid = 0;
+       }
+
+       spin_unlock_irq(&priv->lock);
+}
+
+void xve_flush_paths(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_path *path, *tp;
+
+       list_for_each_entry_safe(path, tp, &priv->path_list, list) {
+               xve_flush_single_path(dev, path);
+       }
+
+}
+
+void xve_flush_single_path_by_gid(struct net_device *dev, union ib_gid *gid)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       unsigned long flags = 0;
+       struct xve_path *path;
+
+       netif_tx_lock_bh(dev);
+       spin_lock_irqsave(&priv->lock, flags);
+
+       path = __path_find(dev, gid->raw);
+       if (!path) {
+               char *mgid_token = gid->raw;
+               char tmp_buf[64];
+
+               xve_debug(DEBUG_FLUSH_INFO, priv, "%s Path not found\n",
+                         __func__);
+               print_mgid_buf(tmp_buf, mgid_token);
+               xve_debug(DEBUG_FLUSH_INFO, priv, "%s MGID %s\n",
+                         __func__, tmp_buf);
+               spin_unlock_irqrestore(&priv->lock, flags);
+               netif_tx_unlock_bh(dev);
+               return;
+       }
+
+       xve_debug(DEBUG_FLUSH_INFO, priv, "%s Flushing the path %p\n",
+                 __func__, path);
+       rb_erase(&path->rb_node, &priv->path_tree);
+       if (path->query)
+               ib_sa_cancel_query(path->query_id, path->query);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+       netif_tx_unlock_bh(dev);
+
+       wait_for_completion(&path->done);
+       list_del(&path->list);
+       path_free(dev, path);
+}
+
+void xve_flush_single_path(struct net_device *dev, struct xve_path *path)
+{
+       xve_flush_single_path_by_gid(dev, &path->pathrec.dgid);
+}
+
+static void path_rec_completion(int status,
+                               struct ib_sa_path_rec *pathrec, void *path_ptr)
+{
+       struct xve_path *path = path_ptr;
+       struct net_device *dev = path->dev;
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_ah *ah = NULL;
+       struct xve_ah *old_ah = NULL;
+       struct sk_buff_head skqueue;
+       struct sk_buff *skb;
+       unsigned long flags;
+
+       if (!status) {
+               priv->counters[XVE_PATHREC_RESP_COUNTER]++;
+               xve_test("XVE: %s PathRec LID 0x%04x for GID %pI6\n",
+                        __func__, be16_to_cpu(pathrec->dlid),
+                        pathrec->dgid.raw);
+       } else {
+               priv->counters[XVE_PATHREC_RESP_ERR_COUNTER]++;
+               xve_test("XVE: %s PathRec status %d for GID %pI6\n",
+                        __func__, status, path->pathrec.dgid.raw);
+       }
+
+       skb_queue_head_init(&skqueue);
+
+       if (!status) {
+               struct ib_ah_attr av;
+
+               if (!ib_init_ah_from_path(priv->ca, priv->port, pathrec, &av)) {
+                       av.ah_flags = IB_AH_GRH;
+                       ah = xve_create_ah(dev, priv->pd, &av);
+               }
+       }
+
+       spin_lock_irqsave(&priv->lock, flags);
+
+       if (ah) {
+               path->pathrec = *pathrec;
+               old_ah = path->ah;
+               path->ah = ah;
+
+               xve_test
+                   ("XVE: %screated address handle %p for LID 0x%04x, SL %d\n",
+                    __func__, ah, be16_to_cpu(pathrec->dlid), pathrec->sl);
+               if (xve_cm_enabled(dev)) {
+                       if (!xve_cmtx_get(path))
+                               xve_cm_create_tx(dev, path);
+               }
+
+               while ((skb = __skb_dequeue(&path->queue)))
+                       __skb_queue_tail(&skqueue, skb);
+               path->valid = 1;
+       }
+
+       path->query = NULL;
+       complete(&path->done);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       if (old_ah)
+               xve_put_ah(old_ah);
+
+       while ((skb = __skb_dequeue(&skqueue))) {
+               skb->dev = dev;
+               if (dev_queue_xmit(skb)) {
+                       xve_warn(priv,
+                                "dev_queue_xmit failed to requeue pkt for %s\n",
+                                priv->xve_name);
+               } else {
+                       xve_test("%s Succefully completed path for %s\n",
+                                __func__, priv->xve_name);
+               }
+       }
+}
+
+static struct xve_path *path_rec_create(struct net_device *dev, void *gid)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_path *path;
+
+       if (!priv->broadcast)
+               return NULL;
+
+       path = kzalloc(sizeof(*path), GFP_ATOMIC);
+       if (!path)
+               return NULL;
+
+       path->dev = dev;
+
+       skb_queue_head_init(&path->queue);
+
+       INIT_LIST_HEAD(&path->fwt_list);
+
+       memcpy(path->pathrec.dgid.raw, gid, sizeof(union ib_gid));
+       path->pathrec.sgid = priv->local_gid;
+       path->pathrec.pkey = cpu_to_be16(priv->pkey);
+       path->pathrec.numb_path = 1;
+       path->pathrec.traffic_class = priv->broadcast->mcmember.traffic_class;
+
+       return path;
+}
+
+static int path_rec_start(struct net_device *dev, struct xve_path *path)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       ib_sa_comp_mask comp_mask =
+           IB_SA_PATH_REC_MTU_SELECTOR | IB_SA_PATH_REC_MTU;
+       struct ib_sa_path_rec p_rec;
+
+       p_rec = path->pathrec;
+       p_rec.mtu_selector = IB_SA_GT;
+
+       switch (roundup_pow_of_two(dev->mtu + VLAN_ETH_HLEN)) {
+       case 512:
+               p_rec.mtu = IB_MTU_256;
+               break;
+       case 1024:
+               p_rec.mtu = IB_MTU_512;
+               break;
+       case 2048:
+               p_rec.mtu = IB_MTU_1024;
+               break;
+       default:
+               /* Wildcard everything */
+               comp_mask = 0;
+               p_rec.mtu = 0;
+               p_rec.mtu_selector = 0;
+       }
+
+       xve_dbg_mcast(priv, "%s Start path record lookup for %pI6 MTU > %d\n",
+                     __func__, p_rec.dgid.raw,
+                     comp_mask ? ib_mtu_enum_to_int(p_rec.mtu) : 0);
+
+       init_completion(&path->done);
+
+       path->query_id =
+           ib_sa_path_rec_get(&xve_sa_client, priv->ca, priv->port,
+                              &p_rec, comp_mask |
+                              IB_SA_PATH_REC_DGID |
+                              IB_SA_PATH_REC_SGID |
+                              IB_SA_PATH_REC_NUMB_PATH |
+                              IB_SA_PATH_REC_TRAFFIC_CLASS |
+                              IB_SA_PATH_REC_PKEY,
+                              1000, GFP_ATOMIC,
+                              path_rec_completion, path, &path->query);
+       if (path->query_id < 0) {
+               xve_warn(priv, "ib_sa_path_rec_get failed: %d for %s\n",
+                        path->query_id, priv->xve_name);
+               path->query = NULL;
+               complete(&path->done);
+               return path->query_id;
+       }
+       priv->counters[XVE_PATHREC_QUERY_COUNTER]++;
+       return 0;
+}
+
+static void xve_path_lookup(struct sk_buff *skb, struct net_device *dev,
+                           struct xve_fwt_entry *fwt_entry, int *ok)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+       struct xve_path *path;
+       unsigned long flags = 0;
+
+       path = __path_find(dev, fwt_entry->dgid.raw);
+       if (!path) {
+               xve_debug(DEBUG_TABLE_INFO, priv, "%s Unable to find path\n",
+                         __func__);
+               path = path_rec_create(dev, fwt_entry->dgid.raw);
+               if (!path)
+                       goto err_drop;
+               __path_add(dev, path);
+       }
+
+       xve_debug(DEBUG_TABLE_INFO, priv, "%s Adding  FWT to list %p\n",
+                 __func__, fwt_entry);
+       spin_lock_irqsave(&xve_fwt->lock, flags);
+       fwt_entry->path = path;
+       list_add_tail(&fwt_entry->list, &path->fwt_list);
+       spin_unlock_irqrestore(&xve_fwt->lock, flags);
+       if (!path->ah) {
+               if (!path->query && path_rec_start(dev, path))
+                       goto err_drop;
+       }
+       *ok = 1;
+       return;
+err_drop:
+       *ok = 0;
+}
+
+static int xve_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       struct sk_buff *bcast_skb = NULL;
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_fwt_entry *fwt_entry = NULL;
+       struct xve_path *path;
+       unsigned long flags;
+       int ret = NETDEV_TX_OK, len = 0;
+       char *smac;
+       u8 skb_need_tofree = 0, inc_drop_cnt = 0, queued_pkt = 0;
+       u16 vlan_tag = 0;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       if (!test_bit(XVE_OPER_UP, &priv->state)) {
+               ret = NETDEV_TX_BUSY;
+               inc_drop_cnt = 1;
+               priv->counters[XVE_TX_DROP_OPER_DOWN_COUNT]++;
+               goto unlock;
+       }
+
+       if (skb->len < XVE_MIN_PACKET_LEN) {
+               priv->counters[XVE_SHORT_PKT_COUNTER]++;
+               if (skb_padto(skb, XVE_MIN_PACKET_LEN)) {
+                       inc_drop_cnt = 1;
+                       priv->counters[XVE_TX_SKB_ALLOC_ERROR_COUNTER]++;
+                       ret = NETDEV_TX_BUSY;
+                       goto unlock;
+               }
+               skb->len = XVE_MIN_PACKET_LEN;
+       }
+
+       len = skb->len;
+       smac = skb->data + ETH_ALEN;
+
+       if (xg_vlan_tx_tag_present(skb))
+               vlan_get_tag(skb, &vlan_tag);
+
+       fwt_entry = xve_fwt_lookup(&priv->xve_fwt, skb->data, vlan_tag, 0);
+       if (!fwt_entry) {
+               if (is_multicast_ether_addr(skb->data)) {
+                       xve_mcast_send(dev, (void *)priv->bcast_mgid.raw, skb);
+                       priv->counters[XVE_TX_MCAST_PKT]++;
+                       goto stats;
+               } else {
+                       /*
+                        * XXX Viswa Need to change this
+                        * Since this is a unicast packet and we do not have
+                        * an L2 table entry
+                        * We need to do the following
+                        * If packet is less than IB MTU,flood it
+                        * If more than IB MTU, we need to send to to all ports
+                        * We still generate ARP and NDP for IPv4 and IPv6
+                        * respectively
+                        */
+
+                       /*
+                        * Do not ARP if if user does not want to for less
+                        * than IB-MTU
+                        */
+                       if (xve_do_arp
+                           || (priv->netdev->mtu >
+                               XVE_UD_MTU(priv->max_ib_mtu)))
+
+                               bcast_skb = xve_generate_query(priv, skb);
+                       if (bcast_skb != NULL)
+                               xve_mcast_send(dev,
+                                              (void *)priv->bcast_mgid.raw,
+                                              bcast_skb);
+                       /*
+                        * Now send the original packet also to over broadcast
+                        * Later add counters for flood mode
+                        */
+                       if (len < XVE_UD_MTU(priv->max_ib_mtu)) {
+                               xve_mcast_send(dev,
+                                              (void *)priv->bcast_mgid.raw,
+                                              skb);
+                               priv->counters[XVE_TX_MCAST_FLOOD_UD]++;
+                       } else {
+                               if (xve_flood_rc) {
+                                       xve_flood_all_paths(dev, skb);
+                                       priv->counters[XVE_TX_MCAST_FLOOD_RC]++;
+                                       /*
+                                        * Free the original skb
+                                        */
+                                       skb_need_tofree = 1;
+                               } else {
+                                       skb_need_tofree = 1;
+                                       goto unlock;
+                               }
+                       }
+                       goto stats;
+               }
+       }
+
+       if (!fwt_entry->path) {
+               int ok;
+
+               priv->counters[XVE_PATH_NOT_FOUND]++;
+               xve_debug(DEBUG_SEND_INFO, priv,
+                         "%s Unable to find neigbour doing a path lookup\n",
+                         __func__);
+               xve_path_lookup(skb, dev, fwt_entry, &ok);
+               if (!ok) {
+                       skb_need_tofree = inc_drop_cnt = 1;
+                       goto free_fwt_ctx;
+               }
+       } else {
+               path = fwt_entry->path;
+               if (!path->ah) {
+                       priv->counters[XVE_AH_NOT_FOUND]++;
+                       xve_debug(DEBUG_SEND_INFO, priv,
+                                 "%s Path present %p no ah\n", __func__,
+                                 fwt_entry->path);
+                       if (!path->query && path_rec_start(dev, path)) {
+                               skb_need_tofree = inc_drop_cnt = 1;
+                               goto free_fwt_ctx;
+                       }
+               }
+       }
+
+       path = fwt_entry->path;
+
+       if (xve_cmtx_get(path)) {
+               if (xve_cm_up(path)) {
+                       xve_cm_send(dev, skb, xve_cmtx_get(path));
+                       update_cm_tx_rate(xve_cmtx_get(path), len);
+                       priv->counters[XVE_TX_RC_COUNTER]++;
+                       goto stats;
+               }
+       } else if (path->ah) {
+               xve_debug(DEBUG_SEND_INFO, priv, "%s path ah is %p\n",
+                         __func__, path->ah);
+               xve_send(dev, skb, path->ah, fwt_entry->dqpn);
+               priv->counters[XVE_TX_UD_COUNTER]++;
+               goto stats;
+       }
+
+       if (skb_queue_len(&path->queue) < XVE_MAX_PATH_REC_QUEUE) {
+               priv->counters[XVE_TX_QUEUE_PKT]++;
+               __skb_queue_tail(&path->queue, skb);
+               queued_pkt = 1;
+       } else {
+               xve_debug(DEBUG_SEND_INFO, priv,
+                         "%s Dropping packets path %p fwt_entry %p\n",
+                         __func__, path, fwt_entry);
+               skb_need_tofree = inc_drop_cnt = 1;
+               goto free_fwt_ctx;
+       }
+stats:
+       INC_TX_PKT_STATS(priv, dev);
+       INC_TX_BYTE_STATS(priv, dev, len);
+       priv->counters[XVE_TX_COUNTER]++;
+free_fwt_ctx:
+       xve_fwt_put_ctx(&priv->xve_fwt, fwt_entry);
+unlock:
+       if (inc_drop_cnt)
+               INC_TX_DROP_STATS(priv, dev);
+
+       if (!queued_pkt)
+               dev->trans_start = jiffies;
+       if (skb_need_tofree)
+               dev_kfree_skb(skb);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+       return ret;
+}
+
+static void xve_timeout(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       xve_warn(priv, "transmit timeout: latency %d msecs\n",
+                jiffies_to_msecs(jiffies - dev->trans_start));
+       xve_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n",
+                netif_queue_stopped(dev), priv->tx_head, priv->tx_tail);
+       priv->counters[XVE_WDOG_TIMEOUT_COUNTER]++;
+}
+
+static void xve_set_mcast_list(struct net_device *dev)
+{
+}
+
+int xve_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       /* Allocate RX/TX "rings" to hold queued skbs */
+       priv->rx_ring = kcalloc(xve_recvq_size, sizeof(*priv->rx_ring),
+                               GFP_KERNEL);
+       if (!priv->rx_ring) {
+               pr_warn("%s: failed to allocate RX ring (%d entries)\n",
+                       ca->name, xve_recvq_size);
+               goto out;
+       }
+
+       priv->tx_ring = vmalloc(xve_sendq_size * sizeof(*priv->tx_ring));
+       if (!priv->tx_ring) {
+               pr_warn("%s: failed to allocate TX ring (%d entries)\n",
+                       ca->name, xve_sendq_size);
+               goto out_rx_ring_cleanup;
+       }
+       memset(priv->tx_ring, 0, xve_sendq_size * sizeof(*priv->tx_ring));
+
+       /* priv->tx_head, tx_tail & tx_outstanding are already 0 */
+
+       if (xve_ib_dev_init(dev, ca, port) != 0) {
+               pr_err("%s Failed for %s\n", __func__, priv->xve_name);
+               goto out_tx_ring_cleanup;
+       }
+
+       return 0;
+
+out_tx_ring_cleanup:
+       vfree(priv->tx_ring);
+
+out_rx_ring_cleanup:
+       kfree(priv->rx_ring);
+
+out:
+       return -ENOMEM;
+}
+
+void xve_dev_cleanup(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       xve_remove_proc_entry(priv);
+       xve_ib_dev_cleanup(dev);
+
+       kfree(priv->rx_ring);
+       vfree(priv->tx_ring);
+
+       priv->rx_ring = NULL;
+       priv->tx_ring = NULL;
+
+       xve_fwt_cleanup(priv);
+}
+
+static int get_skb_hdr(struct sk_buff *skb, void **iphdr,
+                      void **tcph, u64 *hdr_flags, void *priv)
+{
+       unsigned int ip_len;
+       struct iphdr *iph;
+
+       if (unlikely(skb->protocol != htons(ETH_P_IP)))
+               return -1;
+
+       /* Check for non-TCP packet */
+       skb_reset_network_header(skb);
+       iph = ip_hdr(skb);
+       if (iph->protocol != IPPROTO_TCP)
+               return -1;
+
+       ip_len = ip_hdrlen(skb);
+       skb_set_transport_header(skb, ip_len);
+       *tcph = tcp_hdr(skb);
+
+       /* check if IP header and TCP header are complete */
+       if (ntohs(iph->tot_len) < ip_len + tcp_hdrlen(skb))
+               return -1;
+
+       *hdr_flags = LRO_IPV4 | LRO_TCP;
+       *iphdr = iph;
+
+       return 0;
+}
+
+static void xve_lro_setup(struct xve_dev_priv *priv)
+{
+       priv->lro.lro_mgr.max_aggr = lro_max_aggr;
+       priv->lro.lro_mgr.max_desc = XVE_MAX_LRO_DESCRIPTORS;
+       priv->lro.lro_mgr.lro_arr = priv->lro.lro_desc;
+       priv->lro.lro_mgr.get_skb_header = get_skb_hdr;
+       priv->lro.lro_mgr.features = LRO_F_NAPI;
+       priv->lro.lro_mgr.dev = priv->netdev;
+       priv->lro.lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
+}
+
+static struct net_device_ops xve_netdev_ops = {
+       .ndo_open = xve_open,
+       .ndo_stop = xve_stop,
+       .ndo_change_mtu = xve_change_mtu,
+       .ndo_set_mac_address = xve_set_mac_address,
+       .ndo_start_xmit = xve_start_xmit,
+       .ndo_tx_timeout = xve_timeout,
+       .ndo_set_rx_mode = xve_set_mcast_list,
+       .ndo_do_ioctl = xve_ioctl,
+       .ndo_get_stats = xve_get_stats,
+};
+
+static void xve_set_oper_down(struct xve_dev_priv *priv)
+{
+       if (test_and_clear_bit(XVE_OPER_UP, &priv->state)) {
+               handle_carrier_state(priv, 0);
+               clear_bit(XVE_OPER_REP_SENT, &priv->state);
+               clear_bit(XVE_PORT_LINK_UP, &priv->state);
+               clear_bit(XVE_OPER_UP, &priv->state);
+               xve_xsmp_send_oper_state(priv, priv->resource_id,
+                                        XSMP_XVE_OPER_DOWN);
+       }
+}
+
+static void xve_io_disconnect(struct xve_dev_priv *priv)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       if (test_bit(XVE_OPER_UP, &priv->state)) {
+               xve_set_oper_down(priv);
+               spin_unlock_irqrestore(&priv->lock, flags);
+               if (test_bit(XVE_OS_ADMIN_UP, &priv->state))
+                       napi_synchronize(&priv->napi);
+               pr_info("%s Flushing mcast [xve :%s]\n", __func__,
+                       priv->xve_name);
+               xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+       } else {
+               spin_unlock_irqrestore(&priv->lock, flags);
+       }
+}
+
+void handle_carrier_state(struct xve_dev_priv *priv, char state)
+{
+       if (state) {
+               priv->jiffies = jiffies;
+               priv->counters[XVE_TX_WAKE_UP_COUNTER]++;
+               netif_carrier_on(priv->netdev);
+               netif_wake_queue(priv->netdev);
+               /* careful we are holding lock (priv->lock)inside this */
+               xve_data_recv_handler(priv);
+       } else {
+               netif_carrier_off(priv->netdev);
+               netif_stop_queue(priv->netdev);
+               priv->counters[XVE_TX_QUEUE_STOP_COUNTER]++;
+       }
+}
+
+struct sk_buff *xve_generate_query(struct xve_dev_priv *priv,
+                                  struct sk_buff *skb)
+{
+       struct vlan_ethhdr *veth = (struct vlan_ethhdr *)(skb->data);
+
+       if ((xg_vlan_tx_tag_present(skb)
+            && veth->h_vlan_encapsulated_proto == htons(ETH_P_IP))
+           || skb->protocol == htons(ETH_P_IP))
+               return xve_create_arp(priv, skb);
+       if ((xg_vlan_tx_tag_present(skb)
+            && veth->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))
+           || skb->protocol == htons(ETH_P_IPV6))
+               return xve_create_ndp(priv, skb);
+
+       return NULL;
+}
+
+struct sk_buff *xve_create_arp(struct xve_dev_priv *priv,
+                              struct sk_buff *skb_pkt)
+{
+       struct sk_buff *skb;
+       struct arphdr *arp;
+       struct iphdr *iphdr;
+       unsigned char *arp_ptr, *eth_ptr;
+       struct net_device *netdev = priv->netdev;
+
+       skb = alloc_skb(XVE_MIN_PACKET_LEN, GFP_ATOMIC);
+       if (skb == NULL)
+               return NULL;
+
+       eth_ptr = (unsigned char *)skb_put(skb, XVE_MIN_PACKET_LEN);
+       /*
+        * Broadcast packet
+        */
+       memset(eth_ptr, 0xFF, ETH_ALEN);
+       eth_ptr += ETH_ALEN;
+       /*
+        * Copy the source MAC
+        */
+       memcpy(eth_ptr, skb_pkt->data + ETH_ALEN, ETH_ALEN);
+
+       eth_ptr += ETH_ALEN;
+
+       if (xg_vlan_tx_tag_present(skb_pkt)) {
+               u16 vlan_tci = 0;
+               struct vlan_ethhdr *veth;
+
+               vlan_get_tag(skb_pkt, &vlan_tci);
+               veth = (struct vlan_ethhdr *)(skb->data);
+               veth->h_vlan_proto = htons(ETH_P_8021Q);
+               /* now, the TCI */
+               veth->h_vlan_TCI = htons(vlan_tci);
+               eth_ptr += VLAN_HLEN;
+               priv->counters[XVE_TX_MCAST_ARP_VLAN_QUERY]++;
+       }
+
+       *eth_ptr++ = (ETH_P_ARP >> 8) & 0xff;
+       *eth_ptr++ = ETH_P_ARP & 0xff;
+
+       arp = (struct arphdr *)eth_ptr;
+       arp->ar_hrd = htons(netdev->type);
+       arp->ar_pro = htons(ETH_P_IP);
+       arp->ar_hln = netdev->addr_len;
+       arp->ar_pln = 4;
+       arp->ar_op = htons(ARPOP_REQUEST);
+
+       iphdr = (struct iphdr *)ip_hdr(skb_pkt);
+       arp_ptr = (unsigned char *)(arp + 1);
+
+       ether_addr_copy(arp_ptr, skb_pkt->data + ETH_ALEN);
+       arp_ptr += netdev->addr_len;
+       memcpy(arp_ptr, &iphdr->saddr, 4);
+       arp_ptr += 4;
+       ether_addr_copy(arp_ptr, skb_pkt->data);
+       arp_ptr += netdev->addr_len;
+       memcpy(arp_ptr, &iphdr->daddr, 4);
+
+       skb_reset_network_header(skb);
+       skb->dev = netdev;
+       skb->protocol = htons(ETH_P_ARP);
+       priv->counters[XVE_TX_MCAST_ARP_QUERY]++;
+       return skb;
+}
+
+/**
+ * Function:   xve_create_ndp()
+ * Param:      priv - private structure
+ *             skb_pkt - skb buff from stack
+ * Description: generates NDP packet (ARP) packet for ipv6
+ *             This funciton generates Neighbor Solicitation
+ *             packet to discover the link layer address of
+ *             an on-link ipv6 node or to confirm the previously
+ *             determined link layer address.
+ *
+ *     The NDP packet constructed follows the packet format as:
+ *     Ethernet Header
+ *-----------------------------
+ *                     - destination mac               6 bytes
+ *                     - source mac                    6 bytes
+ *                     - type ipv6 (0x86dd)            2 bytes
+ *     IPV6 Header
+ *-----------------------------
+ *                     - Version                       4 bits
+ *                     - traffic class                 4 bits
+ *                     - flow label                    3 bytes
+ *                     - payload length                3 bytes
+ *                     - next header                   1 byte
+ *                     - hop limit                     1 byte
+ *                     - source ip addr                16 bytes
+ *                     - destination ip addr           16 bytes
+ *     ICMPv6 Header
+ *----------------------------
+ *                     - type                          1 byte
+ *                     - code                          1 byte
+ *                     - checksum                      2 bytes
+ *                     - reserved                      4 bytes
+ *                     - target ip addr                16 bytes
+ *     ICMPv6 Optional Header
+ *----------------------------
+ *                     - type                          1 byte
+ *                     - length                        1 byte
+ *                     - source mac addr               6 bytes
+ *-------------------------------------------------------------
+ * TOTAL                                               86 bytes
+ **/
+
+struct sk_buff *xve_create_ndp(struct xve_dev_priv *priv,
+                              struct sk_buff *skb_pkt)
+{
+       struct sk_buff *skb;
+       struct net_device *netdev = priv->netdev;
+       struct ipv6hdr *ipv6_hdr, *ipv6_hdr_tmp;
+       struct icmp6_ndp *icmp_ndp_hdr;
+       unsigned char *hdr_ptr;
+       unsigned char source_addr[16];
+       unsigned char dest_addr[16];
+       int count;              /* keep tack of skb_pkt->data */
+
+       count = 0;
+       skb = alloc_skb(XVE_IPV6_MIN_PACK_LEN, GFP_ATOMIC);
+       if (skb == NULL)
+               return NULL;
+
+       /* get the ipv6hdr from skb_pkt */
+       if (xg_vlan_tx_tag_present(skb_pkt))
+               ipv6_hdr_tmp =
+                   (struct ipv6hdr *)(skb_pkt->data + ETH_HDR_LEN + VLAN_HLEN);
+       else
+               ipv6_hdr_tmp = (struct ipv6hdr *)(skb_pkt->data + ETH_HDR_LEN);
+
+       /* get local copy of source and destination ip address */
+       memcpy(source_addr, ipv6_hdr_tmp->saddr.s6_addr, IPV6_ADDR_LEN);
+       memcpy(dest_addr, ipv6_hdr_tmp->daddr.s6_addr, IPV6_ADDR_LEN);
+
+       /* initialise the memory allocated */
+       memset(skb->data, 0, XVE_IPV6_MIN_PACK_LEN);
+       /* create space for data in skb buffer */
+       hdr_ptr = (unsigned char *)skb_put(skb, XVE_IPV6_MIN_PACK_LEN);
+
+       /* construct destination mac address (multicast address) */
+       hdr_ptr[0] = PREFIX_MULTI_ADDR;
+       hdr_ptr[1] = PREFIX_MULTI_ADDR;
+       /* get the last 4 bytes from ipv6 destination ip address */
+       memcpy(hdr_ptr + 2, &(dest_addr[IPV6_ADDR_LEN - 4]), ETH_ALEN - 2);
+
+       hdr_ptr += ETH_ALEN;
+       count += ETH_ALEN;
+
+       /* copy the source MAC */
+       memcpy(hdr_ptr, skb_pkt->data + ETH_ALEN, ETH_ALEN);
+       hdr_ptr += ETH_ALEN;
+       count += ETH_ALEN;
+
+       if (xg_vlan_tx_tag_present(skb_pkt)) {
+               u16 vlan_tci = 0;
+               struct vlan_ethhdr *veth;
+
+               vlan_get_tag(skb_pkt, &vlan_tci);
+               veth = (struct vlan_ethhdr *)(skb->data);
+               veth->h_vlan_proto = htons(ETH_P_8021Q);
+               /* now, the TCI */
+               veth->h_vlan_TCI = htons(vlan_tci);
+               hdr_ptr += VLAN_HLEN;
+               priv->counters[XVE_TX_MCAST_NDP_VLAN_QUERY]++;
+       }
+
+       *hdr_ptr++ = (ETH_P_IPV6 >> 8) & 0xff;
+       count++;
+       *hdr_ptr++ = ETH_P_IPV6 & 0xff;
+       count++;
+
+       /* get the header pointer to populate with ipv6 header */
+       ipv6_hdr = (struct ipv6hdr *)hdr_ptr;
+
+       /* construct ipv6 header */
+       ipv6_hdr->priority = ipv6_hdr_tmp->priority;
+       ipv6_hdr->version = ipv6_hdr_tmp->version;
+       memcpy(ipv6_hdr->flow_lbl, ipv6_hdr_tmp->flow_lbl, 3);
+       ipv6_hdr->payload_len = PAYLOAD_LEN;
+       ipv6_hdr->nexthdr = NEXTHDR_ICMP;
+       ipv6_hdr->hop_limit = ipv6_hdr_tmp->hop_limit;
+       /* get the ipv6 source ip address */
+       memcpy(ipv6_hdr->saddr.s6_addr, source_addr, IPV6_ADDR_LEN);
+       /* construct the multicast dest. ip addr. Solicited Node address */
+       memcpy(&(ipv6_dmac_addr[13]), &(dest_addr[13]), 3);
+       /* get the ipv6 destination ip address */
+       memcpy(ipv6_hdr->daddr.s6_addr, ipv6_dmac_addr, IPV6_ADDR_LEN);
+
+       /* update the header pointer */
+       hdr_ptr += IPV6_HDR_LEN;
+       /* get the header pointer to populate with icmp header */
+       icmp_ndp_hdr = (struct icmp6_ndp *)hdr_ptr;
+
+       /* initialize with ICMP-NDP type */
+       icmp_ndp_hdr->icmp6_type = ICMP_NDP_TYPE;
+
+       /* initialize with ICMP-NDP code */
+       icmp_ndp_hdr->icmp6_code = ICMP_CODE;
+
+       /* get the destination addr from ipv6 header for
+        * ICMP-NDP destination addr */
+       memcpy(&(icmp_ndp_hdr->icmp6_daddr), dest_addr, IPV6_ADDR_LEN);
+
+       /* update icmp header with the optional header */
+       icmp_ndp_hdr->icmp6_option_type = ICMP_OPTION_TYPE;
+       icmp_ndp_hdr->icmp6_option_len = ICMP_OPTION_LEN;
+       /* get the source mac address */
+       memcpy(&(icmp_ndp_hdr->icmp6_option_saddr), skb_pkt->data + ETH_ALEN,
+              ETH_ALEN);
+
+       /* calculate the checksum and update the ICMP-NDP header */
+       icmp_ndp_hdr->icmp6_cksum =
+           csum_ipv6_magic((struct in6_addr *)ipv6_hdr->saddr.s6_addr,
+                           (struct in6_addr *)ipv6_hdr->daddr.s6_addr,
+                           PAYLOAD_LEN, IPPROTO_ICMPV6,
+                           csum_partial(icmp_ndp_hdr, PAYLOAD_LEN, 0));
+
+       skb_reset_network_header(skb);
+       skb->dev = netdev;
+       skb->protocol = htons(ETH_P_IPV6);
+       priv->counters[XVE_TX_MCAST_NDP_QUERY]++;
+       return skb;
+}
+
+int xve_send_hbeat(struct xve_dev_priv *priv)
+{
+       struct sk_buff *skb;
+       struct arphdr *arp;
+       unsigned char *arp_ptr, *eth_ptr;
+       int ret;
+
+       if (!xve_hbeat_enable)
+               return 0;
+       skb = alloc_skb(XVE_MIN_PACKET_LEN, GFP_ATOMIC);
+       if (skb == NULL) {
+               priv->counters[XVE_HBEAT_ERR_COUNTER]++;
+               return -ENOMEM;
+       }
+       priv->counters[XVE_DATA_HBEAT_COUNTER]++;
+
+       eth_ptr = (unsigned char *)skb_put(skb, XVE_MIN_PACKET_LEN);
+       ether_addr_copy(eth_ptr, priv->netdev->dev_addr);
+       eth_ptr += ETH_ALEN;
+       ether_addr_copy(eth_ptr, priv->netdev->dev_addr);
+       eth_ptr += ETH_ALEN;
+       *eth_ptr++ = (ETH_P_RARP >> 8) & 0xff;
+       *eth_ptr++ = ETH_P_RARP & 0xff;
+
+       arp = (struct arphdr *)eth_ptr;
+       arp->ar_hrd = htons(priv->netdev->type);
+       arp->ar_hln = priv->netdev->addr_len;
+       arp->ar_pln = 4;
+       arp->ar_op = htons(ARPOP_RREPLY);
+
+       arp_ptr = (unsigned char *)(arp + 1);
+
+       ether_addr_copy(arp_ptr, priv->netdev->dev_addr);
+       arp_ptr += priv->netdev->addr_len;
+       arp_ptr += 4;
+       ether_addr_copy(arp_ptr, priv->netdev->dev_addr);
+
+       skb_reset_network_header(skb);
+       skb->dev = priv->netdev;
+       skb->protocol = htons(ETH_P_RARP);
+
+       ret = xve_start_xmit(skb, priv->netdev);
+       if (ret)
+               dev_kfree_skb_any(skb);
+
+       return 0;
+}
+
+static int xve_xsmp_send_msg(xsmp_cookie_t xsmp_hndl, void *data, int length)
+{
+       struct xsmp_message_header *m_header = data;
+       int ret;
+
+       m_header->length = cpu_to_be16(m_header->length);
+       ret = xcpm_send_message(xsmp_hndl, xve_xsmp_service_id, data, length);
+       if (ret)
+               xcpm_free_msg(data);
+       return ret;
+}
+
+static int xve_xsmp_send_notification(struct xve_dev_priv *priv, u64 vid,
+                                     int notifycmd)
+{
+       xsmp_cookie_t *xsmp_hndl = priv->xsmp_hndl;
+       int length = sizeof(struct xsmp_message_header) +
+           sizeof(struct xve_xsmp_msg);
+       void *msg;
+       struct xsmp_message_header *header;
+       struct xve_xsmp_msg *xsmp_msg;
+
+       msg = xcpm_alloc_msg(length);
+       if (!msg)
+               return -ENOMEM;
+
+       memset(msg, 0, length);
+
+       header = (struct xsmp_message_header *)msg;
+       xsmp_msg = (struct xve_xsmp_msg *)(msg + sizeof(*header));
+
+       if (notifycmd == XSMP_XVE_OPER_UP) {
+               pr_info("XVE: %s sending updated mtu for %s[mtu %d]\n",
+                       __func__, priv->xve_name, priv->admin_mtu);
+               xsmp_msg->vn_mtu = cpu_to_be16(priv->admin_mtu);
+               xsmp_msg->net_id = cpu_to_be32(priv->net_id);
+       }
+
+       header->type = XSMP_MESSAGE_TYPE_XVE;
+       header->length = length;
+
+       xsmp_msg->type = notifycmd;
+       xsmp_msg->length = cpu_to_be16(sizeof(*xsmp_msg));
+       xsmp_msg->resource_id = cpu_to_be64(vid);
+
+       return xve_xsmp_send_msg(xsmp_hndl, msg, length);
+}
+
+static void handle_action_flags(struct xve_dev_priv *priv)
+{
+       if (test_bit(XVE_TRIGGER_NAPI_SCHED, &priv->state)) {
+               xve_data_recv_handler(priv);
+               clear_bit(XVE_TRIGGER_NAPI_SCHED, &priv->state);
+       }
+}
+
+static int xve_state_machine(struct xve_dev_priv *priv)
+{
+
+       priv->counters[XVE_STATE_MACHINE]++;
+
+       if (!test_bit(XVE_OS_ADMIN_UP, &priv->state) ||
+           !test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state) ||
+           test_bit(XVE_IBLINK_DOWN, &priv->state) ||
+           test_bit(XVE_DELETING, &priv->state)) {
+               priv->counters[XVE_STATE_MACHINE_DOWN]++;
+               xve_io_disconnect(priv);
+               if (test_bit(XVE_SEND_ADMIN_STATE, &priv->state)) {
+                       clear_bit(XVE_SEND_ADMIN_STATE, &priv->state);
+                       xve_xsmp_send_notification(priv,
+                                                  priv->resource_id,
+                                                  XSMP_XVE_UPDATE);
+               }
+               priv->sm_delay = 2000;
+               goto out;
+       }
+
+       if (test_bit(XVE_OPER_UP, &priv->state) &&
+           test_bit(XVE_OS_ADMIN_UP, &priv->state) &&
+           !test_bit(XVE_DELETING, &priv->state)) {
+
+               priv->counters[XVE_STATE_MACHINE_UP]++;
+               if (!test_bit(XVE_OPER_REP_SENT, &priv->state))
+                       (void)xve_xsmp_handle_oper_req(priv->xsmp_hndl,
+                                                      priv->resource_id);
+
+               /*Bring ib up (start mcast ) */
+               if (!test_bit(XVE_FLAG_OPER_UP, &priv->flags))
+                       xve_ib_dev_up(priv->netdev);
+
+               /* Clear Out standing IB Event */
+               if (test_and_clear_bit(XVE_FLAG_IB_EVENT, &priv->flags)) {
+                       xve_debug(DEBUG_MCAST_INFO, priv,
+                                 "%s Clear  Pending IB  work [xve %s]\n",
+                                 __func__, priv->xve_name);
+                       xve_queue_work(priv, XVE_WQ_START_MCASTRESTART);
+               }
+
+               handle_action_flags(priv);
+
+               if (priv->send_hbeat_flag) {
+                       poll_tx(priv);
+                       xve_send_hbeat(priv);
+               }
+               priv->send_hbeat_flag = 1;
+       }
+
+out:
+       return 0;
+}
+
+void queue_age_work(struct xve_dev_priv *priv, int msecs)
+{
+       unsigned long flags = 0;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       if (!test_bit(XVE_DELETING, &priv->state) &&
+           test_bit(XVE_OS_ADMIN_UP, &priv->state))
+               xve_queue_dwork(priv, XVE_WQ_START_FWT_AGING,
+                               msecs_to_jiffies(msecs));
+       spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+void queue_sm_work(struct xve_dev_priv *priv, int msecs)
+{
+       int del = 0;
+       unsigned long flags = 0;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       if (!test_bit(XVE_DELETING, &priv->state))
+               queue_delayed_work(xve_workqueue, &priv->sm_work,
+                                  msecs_to_jiffies(msecs));
+       else
+               del = 1;
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       if (del)
+               xve_remove_one(priv);
+}
+
+void xve_start_aging_work(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_FWT_AGING, 1);
+
+       mutex_lock(&priv->mutex);
+       xve_aging_task_machine(priv);
+       mutex_unlock(&priv->mutex);
+
+       if (priv->aging_delay != 0)
+               queue_age_work(priv, 30 * HZ);
+       xve_put_ctx(priv);
+}
+
+void xve_state_machine_work(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           container_of(work, struct xve_dev_priv, sm_work.work);
+
+       mutex_lock(&priv->mutex);
+       xve_state_machine(priv);
+       mutex_unlock(&priv->mutex);
+
+       queue_sm_work(priv, priv->sm_delay);
+}
+
+static void xve_setup(struct net_device *netdev)
+{
+       struct xve_dev_priv *priv = netdev_priv(netdev);
+
+       ether_setup(netdev);
+       priv->netdev = netdev;
+}
+
+static void xve_set_netdev(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       spin_lock_init(&priv->lock);
+       mutex_init(&priv->mutex);
+
+       INIT_LIST_HEAD(&priv->path_list);
+       INIT_LIST_HEAD(&priv->dead_ahs);
+       INIT_LIST_HEAD(&priv->multicast_list);
+       INIT_DELAYED_WORK(&priv->sm_work, xve_state_machine_work);
+       INIT_DELAYED_WORK(&priv->mcast_leave_task, xve_mcast_leave_task);
+       INIT_DELAYED_WORK(&priv->mcast_join_task, xve_mcast_join_task);
+       INIT_DELAYED_WORK(&priv->stale_task, xve_cm_stale_task);
+
+}
+
+int xve_set_dev_features(struct xve_dev_priv *priv, struct ib_device *hca)
+{
+       struct ib_device_attr *device_attr;
+       int result = -ENOMEM;
+
+       priv->netdev->watchdog_timeo = 1000 * HZ;
+       priv->netdev->tx_queue_len = xve_sendq_size * 2;
+       priv->netdev->features |=
+           NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_IP_CSUM;
+       set_bit(XVE_FLAG_CSUM, &priv->flags);
+
+       if (lro)
+               priv->lro_mode = 1;
+       /* 1 -RC , 2 -UD */
+       if (priv->vnet_mode == 1) {
+               pr_info("XVE: %s Setting RC mode for %s\n", __func__,
+                       priv->xve_name);
+               strcpy(priv->mode, "connected(RC)");
+               /* Turn off checksum offload If the module parameter is set */
+               /* TBD if the chassis sends a CHECK SUM BIT */
+               if (xve_no_tx_checksum_offload) {
+                       priv->netdev->features &= ~NETIF_F_IP_CSUM;
+                       clear_bit(XVE_FLAG_CSUM, &priv->flags);
+               }
+
+               set_bit(XVE_FLAG_ADMIN_CM, &priv->flags);
+               priv->netdev->features &= ~(NETIF_F_TSO | NETIF_F_SG);
+               priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+               priv->cm_supported = 1;
+       } else {                /* UD */
+               /* MTU will be reset when mcast join happens */
+               strcpy(priv->mode, "datagram(UD)");
+               if (priv->netdev->mtu > XVE_UD_MTU(priv->max_ib_mtu))
+                       priv->netdev->mtu = XVE_UD_MTU(priv->max_ib_mtu);
+               priv->lro_mode = 1;
+               priv->cm_supported = 0;
+
+       }
+       priv->mcast_mtu = priv->admin_mtu = priv->netdev->mtu;
+
+       if (priv->lro_mode)
+               priv->netdev->features |= NETIF_F_LRO;
+
+       xg_setup_pseudo_device(priv->netdev, hca);
+
+       SET_NETDEV_OPS(priv->netdev, &xve_netdev_ops);
+       xve_set_ethtool_ops(priv->netdev);
+       netif_napi_add(priv->netdev, &priv->napi, xve_poll, napi_weight);
+       if (xve_esx_preregister_setup(priv->netdev))
+               return -EINVAL;
+       xve_lro_setup(priv);
+
+       xve_set_netdev(priv->netdev);
+
+       device_attr = kmalloc(sizeof(*device_attr), GFP_KERNEL);
+
+       if (!device_attr) {
+               pr_warn("%s: allocation of %zu bytes failed\n",
+                       hca->name, sizeof(*device_attr));
+               return result;
+       }
+
+       result = ib_query_device(hca, device_attr);
+       if (result) {
+               pr_warn("%s: ib_query_device failed (ret = %d)\n",
+                       hca->name, result);
+               kfree(device_attr);
+               return result;
+       }
+       priv->hca_caps = device_attr->device_cap_flags;
+
+       kfree(device_attr);
+
+       return 0;
+}
+
+static int xve_xsmp_send_nack(xsmp_cookie_t xsmp_hndl, void *data, int length,
+                             u8 code)
+{
+       void *msg;
+       struct xsmp_message_header *m_header;
+       int total_len = length + sizeof(struct xsmp_message_header);
+       struct xve_xsmp_msg *xsmsgp = (struct xve_xsmp_msg *)data;
+
+       msg = xcpm_alloc_msg(total_len);
+       if (!msg)
+               return -ENOMEM;
+       m_header = (struct xsmp_message_header *)msg;
+       m_header->type = XSMP_MESSAGE_TYPE_XVE;
+       m_header->length = total_len;
+
+       xsmsgp->code = XSMP_XVE_NACK | code;
+       memcpy(msg + sizeof(*m_header), data, length);
+       return xve_xsmp_send_msg(xsmp_hndl, msg, total_len);
+}
+
+void xve_remove_one(struct xve_dev_priv *priv)
+{
+
+       int count = 0;
+
+       pr_info("XVE:%s Removing xve interface %s\n", __func__, priv->xve_name);
+       ib_unregister_event_handler(&priv->event_handler);
+       cancel_delayed_work_sync(&priv->stale_task);
+       rtnl_lock();
+       dev_change_flags(priv->netdev, priv->netdev->flags & ~IFF_UP);
+       rtnl_unlock();
+       vmk_notify_uplink(priv->netdev);
+       unregister_netdev(priv->netdev);
+       pr_info("XVE:%s Unregistered xve interface %s\n", __func__,
+               priv->xve_name);
+       /* Wait for reference count to go zero  */
+       while (atomic_read(&priv->ref_cnt) && xve_continue_unload()) {
+               count++;
+               if (count > 20) {
+                       pr_info("%s: Waiting for refcnt to become", __func__);
+                       pr_info("zero [xve: %s] %d\n",
+                               priv->xve_name, atomic_read(&priv->ref_cnt));
+                       count = 0;
+               }
+               msleep(1000);
+       }
+       xve_dev_cleanup(priv->netdev);
+       if (!test_bit(XVE_SHUTDOWN, &priv->state)) {
+               /* Ideally need to figure out why userspace ACK isn't working */
+               xve_xsmp_send_notification(priv,
+                                          priv->resource_id, XSMP_XVE_DELETE);
+       }
+       mutex_lock(&xve_mutex);
+       list_del(&priv->list);
+       mutex_unlock(&xve_mutex);
+       free_netdev(priv->netdev);
+
+       pr_info("XVE:%s Removed xve interface %s\n", __func__, priv->xve_name);
+
+}
+
+static int xcpm_check_vnic_from_same_pvi(xsmp_cookie_t xsmp_hndl,
+                                        struct xve_xsmp_msg *xmsgp)
+{
+       struct xve_dev_priv *priv;
+       struct xsmp_session_info xsmp_info;
+       union ib_gid local_gid;
+       struct ib_device *hca;
+       u8 port;
+       char gid_buf[64];
+
+       xcpm_get_xsmp_session_info(xsmp_hndl, &xsmp_info);
+       hca = xsmp_info.ib_device;
+       port = xscore_port_num(xsmp_info.port);
+       (void)ib_query_gid(hca, port, 0, &local_gid);
+
+       mutex_lock(&xve_mutex);
+       list_for_each_entry(priv, &xve_dev_list, list) {
+               if (xmsgp->net_id == cpu_to_be32(priv->net_id) &&
+                   memcmp(priv->local_gid.raw, local_gid.raw,
+                          sizeof(local_gid)) == 0) {
+                       mutex_unlock(&xve_mutex);
+                       print_mgid_buf(gid_buf, local_gid.raw);
+                       pr_info("XVE: %s,%s Multiple VNIC on same pvi",
+                               xmsgp->xve_name, priv->xve_name);
+                       pr_info("%d on same port %s NOT allowed\n",
+                               priv->net_id, gid_buf + 8);
+                       return -EEXIST;
+               }
+       }
+       mutex_unlock(&xve_mutex);
+       return 0;
+}
+
+static int xve_check_for_hca(xsmp_cookie_t xsmp_hndl)
+{
+       struct ib_device *hca;
+       struct xsmp_session_info xsmp_info;
+
+       xcpm_get_xsmp_session_info(xsmp_hndl, &xsmp_info);
+       hca = xsmp_info.ib_device;
+
+       if (!((strncmp(hca->name, "mlx4", 4) != 0) ||
+                       (strncmp(hca->name, "sif0", 4) != 0)))
+               return -EEXIST;
+
+       return 0;
+}
+
+struct xve_dev_priv *xve_get_xve_by_vid(u64 resource_id)
+{
+       struct xve_dev_priv *priv;
+
+       mutex_lock(&xve_mutex);
+       list_for_each_entry(priv, &xve_dev_list, list) {
+               if (priv->resource_id == resource_id) {
+                       mutex_unlock(&xve_mutex);
+                       return priv;
+               }
+       }
+       mutex_unlock(&xve_mutex);
+
+       return NULL;
+}
+
+struct xve_dev_priv *xve_get_xve_by_name(char *xve_name)
+{
+       struct xve_dev_priv *priv;
+
+       mutex_lock(&xve_mutex);
+       list_for_each_entry(priv, &xve_dev_list, list) {
+               if (strcmp(priv->xve_name, xve_name) == 0) {
+                       mutex_unlock(&xve_mutex);
+                       return priv;
+               }
+       }
+       mutex_unlock(&xve_mutex);
+
+       return NULL;
+}
+
+int xve_xsmp_send_oper_state(struct xve_dev_priv *priv, u64 vid, int state)
+{
+       int ret;
+       char *str = state == XSMP_XVE_OPER_UP ? "UP" : "DOWN";
+
+       pr_info("XVE: %s Sending OPER state [%d]  to %s\n",
+               __func__, state, priv->xve_name);
+       if (state == XSMP_XVE_OPER_UP) {
+               set_bit(XVE_OPER_REP_SENT, &priv->state);
+               set_bit(XVE_PORT_LINK_UP, &priv->state);
+       } else {
+               clear_bit(XVE_OPER_REP_SENT, &priv->state);
+               clear_bit(XVE_PORT_LINK_UP, &priv->state);
+       }
+
+       ret = xve_xsmp_send_notification(priv, vid, state);
+       XSMP_INFO("XVE: %s:Oper %s notification  for ", __func__, str);
+       XSMP_INFO("resource_id: 0x%Lx state %d\n", vid, state);
+
+       return ret;
+}
+
+static void xve_set_oper_up_state(struct xve_dev_priv *priv)
+{
+       unsigned long flags = 0;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       set_bit(XVE_OPER_UP, &priv->state);
+       spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int handle_admin_state_change(struct xve_dev_priv *priv,
+                                    struct xve_xsmp_msg *xmsgp)
+{
+       if (xmsgp->admin_state) {
+               XSMP_INFO("%s: VNIC %s Admin state up message\n", __func__,
+                         priv->xve_name);
+               if (!test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state)) {
+                       priv->counters[XVE_ADMIN_UP_COUNTER]++;
+                       set_bit(XVE_CHASSIS_ADMIN_UP, &priv->state);
+                       set_bit(XVE_SEND_ADMIN_STATE, &priv->state);
+                       /*
+                        * We wont have notification from XT as in
+                        * VNIC so set OPER_UP Here
+                        */
+                       xve_set_oper_up_state(priv);
+               }
+       } else {                /* Admin Down */
+               XSMP_INFO("%s: VNIC %s Admin state down message\n",
+                         __func__, priv->xve_name);
+               if (test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state)) {
+                       priv->counters[XVE_ADMIN_DOWN_COUNTER]++;
+                       clear_bit(XVE_CHASSIS_ADMIN_UP, &priv->state);
+                       set_bit(XVE_SEND_ADMIN_STATE, &priv->state);
+               }
+       }
+       return 0;
+}
+
+void xve_xsmp_handle_oper_req(xsmp_cookie_t xsmp_hndl, u64 resource_id)
+{
+       struct xve_dev_priv *priv;
+       unsigned long flags = 0;
+
+       priv = xve_get_xve_by_vid(resource_id);
+       if (!priv)
+               return;
+       spin_lock_irqsave(&priv->lock, flags);
+
+       priv->counters[XVE_OPER_REQ_COUNTER]++;
+       xve_xsmp_send_oper_state(priv, resource_id,
+                                test_bit(XVE_OPER_UP,
+                                         &priv->state) ? XSMP_XVE_OPER_UP :
+                                XSMP_XVE_OPER_DOWN);
+
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+}
+
+static int xve_xsmp_send_ack(struct xve_dev_priv *priv,
+                            struct xve_xsmp_msg *xmsgp)
+{
+       void *msg;
+       struct xsmp_message_header *m_header;
+       int total_len = sizeof(*xmsgp) + sizeof(*m_header);
+       xsmp_cookie_t xsmp_hndl = priv->xsmp_hndl;
+
+       msg = xcpm_alloc_msg(total_len);
+       if (!msg)
+               return -ENOMEM;
+       m_header = (struct xsmp_message_header *)msg;
+       m_header->type = XSMP_MESSAGE_TYPE_XVE;
+       m_header->length = total_len;
+
+       xmsgp->code = 0;
+       xmsgp->vn_mtu = cpu_to_be16(priv->admin_mtu);
+       xmsgp->net_id = cpu_to_be32(priv->net_id);
+       pr_info("XVE: %s ACK back with admin mtu ", __func__);
+       pr_info("%d for %s", xmsgp->vn_mtu, priv->xve_name);
+       pr_info("[netid %d ]\n", xmsgp->net_id);
+
+       memcpy(msg + sizeof(*m_header), xmsgp, sizeof(*xmsgp));
+
+       return xve_xsmp_send_msg(xsmp_hndl, msg, total_len);
+}
+
+/*
+ * Handle install message
+ */
+
+static int xve_xsmp_install(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp,
+                           void *data, int len)
+{
+       struct net_device *netdev;
+       struct xve_dev_priv *priv;
+       char xve_name[XVE_MAX_NAME_SIZE];
+       int ret = 0;
+       int update_state = 0;
+       int result = -ENOMEM;
+       struct ib_device *hca;
+       u8 port;
+       __be32 net_id_be;
+       u8 ecode = 0;
+
+       if (xve_check_for_hca(xsmp_hndl) != 0) {
+               pr_info("Warning !!!!! Unsupported HCA card for xve ");
+               pr_info("interface - %s XSF feature is only ", xmsgp->xve_name);
+               pr_info("supported on Connect-X HCA cards !!!!!!!");
+               ret = -EEXIST;
+               goto dup_error;
+       }
+
+       priv = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
+       if (priv) {
+               /*
+                * Duplicate VID, send ACK, send oper state update
+                */
+               XSMP_ERROR
+                   ("%s: Duplicate XVE install message name: %s, VID=0x%llx\n",
+                    __func__, xmsgp->xve_name,
+                    be64_to_cpu(xmsgp->resource_id));
+               ret = -EEXIST;
+               update_state = 1;
+               priv->xsmp_hndl = xsmp_hndl;
+               goto send_ack;
+       }
+
+       if (xcpm_check_duplicate_names
+           (xsmp_hndl, xmsgp->xve_name, XSMP_MESSAGE_TYPE_VNIC) != 0) {
+               pr_info("%s Duplicate name %s\n", __func__, xmsgp->xve_name);
+               ret = -EEXIST;
+               goto dup_error;
+       }
+
+       if (xcpm_check_vnic_from_same_pvi(xsmp_hndl, xmsgp) != 0) {
+               ret = -EEXIST;
+               goto dup_error;
+       }
+
+       strncpy(xve_name, xmsgp->xve_name, sizeof(xve_name) - 1);
+
+       priv = xve_get_xve_by_name(xve_name);
+       if (priv) {
+               XSMP_ERROR("%s: Duplicate name: %s, VID=0x%llx\n",
+                          __func__, xmsgp->xve_name,
+                          be64_to_cpu(xmsgp->resource_id));
+               ret = -EEXIST;
+               goto dup_error;
+       }
+
+       netdev =
+           alloc_netdev(sizeof(*priv), xve_name, NET_NAME_UNKNOWN, &xve_setup);
+       if (netdev == NULL) {
+               XSMP_ERROR("%s: alloc_netdev error name: %s, VID=0x%llx\n",
+                          __func__, xmsgp->xve_name,
+                          be64_to_cpu(xmsgp->resource_id));
+               ret = -ENOMEM;
+               ecode = XVE_NACK_ALLOCATION_ERROR;
+               goto dup_error;
+       }
+       priv = netdev_priv(netdev);
+
+       pr_info("XVE: %s Installing xve %s - ", __func__, xmsgp->xve_name);
+       pr_info("resource id %llx", be64_to_cpu(xmsgp->resource_id));
+       pr_info("priv DS %p\n", priv);
+
+       xcpm_get_xsmp_session_info(xsmp_hndl, &priv->xsmp_info);
+       hca = priv->xsmp_info.ib_device;
+       port = xscore_port_num(priv->xsmp_info.port);
+       /* Parse PVI parameters */
+       priv->vnet_mode = (xmsgp->vnet_mode);
+       priv->net_id = be32_to_cpu(xmsgp->net_id);
+       priv->netdev->mtu = be16_to_cpu(xmsgp->vn_mtu);
+       priv->resource_id = be64_to_cpu(xmsgp->resource_id);
+       priv->mp_flag = be16_to_cpu(xmsgp->mp_flag);
+       priv->xsmp_hndl = xsmp_hndl;
+       priv->sm_delay = 1000;
+       priv->aging_delay = xve_aging_timeout * HZ;
+       strcpy(priv->xve_name, xmsgp->xve_name);
+       strcpy(priv->proc_name, priv->xve_name);
+       net_id_be = cpu_to_be32(priv->net_id);
+
+       /* Always set chassis ADMIN up by default */
+       set_bit(XVE_CHASSIS_ADMIN_UP, &priv->state);
+
+       if (!ib_query_port(hca, port, &priv->port_attr))
+               priv->max_ib_mtu = ib_mtu_enum_to_int(priv->port_attr.max_mtu);
+       else {
+               pr_warn("%s: ib_query_port %d failed\n", hca->name, port);
+               goto device_init_failed;
+       }
+
+       memcpy(priv->bcast_mgid.raw, bcast_mgid, sizeof(union ib_gid));
+       pr_info("XVE: %s adding vnic %s ", __func__, priv->xve_name);
+       pr_info("net_id %d vnet_mode %d", priv->net_id, priv->vnet_mode);
+       pr_info("port %d net_id_be %d\n", port, net_id_be);
+       memcpy(&priv->bcast_mgid.raw[4], &net_id_be, sizeof(net_id_be));
+
+       result = ib_query_pkey(hca, port, 0, &priv->pkey);
+       if (result) {
+               pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
+                       hca->name, port, result);
+               goto device_init_failed;
+       }
+
+       if (xve_set_dev_features(priv, hca))
+               goto device_init_failed;
+       /*
+        * Set the full membership bit, so that we join the right
+        * broadcast group, etc.
+        */
+       priv->pkey |= 0x8000;
+
+       result = ib_query_gid(hca, port, 0, &priv->local_gid);
+
+       if (result) {
+               pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n",
+                       hca->name, port, result);
+               goto device_init_failed;
+       } else {
+               u64 m;
+
+               m = xmsgp->mac_high;
+               m = m << 32 | xmsgp->mac_low;
+               m = be64_to_cpu(m);
+               memcpy(priv->netdev->dev_addr, (u8 *) (&m) + 2, ETH_ALEN);
+               priv->mac = m << 32 | xmsgp->mac_low;
+       }
+
+       result = xve_dev_init(priv->netdev, hca, port);
+       if (result != 0) {
+               pr_warn
+                   ("%s: failed to initialize port %d net_id %d (ret = %d)\n",
+                    hca->name, port, priv->net_id, result);
+               goto device_init_failed;
+       }
+
+       INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, xve_event);
+       result = ib_register_event_handler(&priv->event_handler);
+       if (result < 0) {
+               pr_warn("%s: ib_register_event_handler failed for ", hca->name);
+               pr_warn("port %d net_id %d (ret = %d)\n",
+                       port, priv->net_id, result);
+               goto event_failed;
+       }
+
+       xve_fwt_init(&priv->xve_fwt);
+
+       if (xve_add_proc_entry(priv)) {
+               pr_err("XVE; %s procfs error name: %s, VID=0x%llx\n",
+                      __func__, priv->xve_name,
+                      be64_to_cpu(xmsgp->resource_id));
+               goto proc_error;
+       }
+
+       result = register_netdev(priv->netdev);
+       if (result) {
+               pr_warn("%s: couldn't register xve %d net_id %d; error %d\n",
+                       hca->name, port, priv->net_id, result);
+               goto register_failed;
+       }
+
+       handle_carrier_state(priv, 0);
+       if (xve_esx_postregister_setup(priv->netdev)) {
+               ecode = XVE_NACK_ALLOCATION_ERROR;
+               goto sysfs_failed;
+       }
+
+       mutex_lock(&xve_mutex);
+       list_add_tail(&priv->list, &xve_dev_list);
+       mutex_unlock(&xve_mutex);
+
+       xve_send_msg_to_xsigod(xsmp_hndl, data, len);
+
+       queue_sm_work(priv, 0);
+
+       pr_info("%s Successfully created xve [%s]\n", __func__,
+               xmsgp->xve_name);
+
+send_ack:
+       ret = xve_xsmp_send_ack(priv, xmsgp);
+       if (ret) {
+               XSMP_ERROR("%s: xve_xsmp_send_ack error name: %s, VID=0x%llx\n",
+                          __func__, xmsgp->xve_name,
+                          be64_to_cpu(xmsgp->resource_id));
+       }
+       if (update_state) {
+               printk
+                   ("XVE: %s Sending Oper state to  chassis for %s id %llx\n",
+                    __func__, priv->xve_name, priv->resource_id);
+               (void)xve_xsmp_handle_oper_req(priv->xsmp_hndl,
+                                              priv->resource_id);
+       }
+
+       return 0;
+
+sysfs_failed:
+       unregister_netdev(priv->netdev);
+register_failed:
+proc_error:
+       ib_unregister_event_handler(&priv->event_handler);
+event_failed:
+       xve_dev_cleanup(priv->netdev);
+device_init_failed:
+       free_netdev(priv->netdev);
+dup_error:
+       (void)xve_xsmp_send_nack(xsmp_hndl, xmsgp, sizeof(*xmsgp), ecode);
+       return ret;
+
+}
+
+static void xve_send_msg_to_xsigod(xsmp_cookie_t xsmp_hndl, void *data, int len)
+{
+       void *tmsg;
+
+       tmsg = xcpm_alloc_msg(len);
+       if (!tmsg)
+               return;
+       memcpy(tmsg, data, len);
+       if (xcpm_send_msg_xsigod(xsmp_hndl, tmsg, len))
+               xcpm_free_msg(tmsg);
+}
+
+static void xve_handle_ip_req(xsmp_cookie_t xsmp_hndl, u8 *data, int len)
+{
+       struct xve_xsmp_vlanip_msg *msgp =
+           (struct xve_xsmp_vlanip_msg *)(data +
+                                          sizeof(struct xsmp_message_header));
+       struct xve_dev_priv *priv;
+
+       priv = xve_get_xve_by_vid(be64_to_cpu(msgp->resource_id));
+       if (!priv) {
+               xve_counters[XVE_VNIC_DEL_NOVID_COUNTER]++;
+               return;
+       }
+       XSMP_INFO("%s:XSMP message type VLAN IP for %s\n", __func__,
+                 priv->xve_name);
+       strcpy(msgp->ifname, priv->xve_name);
+       msgp->mp_flag = cpu_to_be16(priv->mp_flag);
+
+       /*
+        * Punt this message to userspace
+        */
+       xve_send_msg_to_xsigod(xsmp_hndl, data, len);
+}
+
+static void xve_xsmp_send_stats(xsmp_cookie_t xsmp_hndl, u8 *data, int length)
+{
+       struct xve_dev_priv *priv;
+       struct xve_xsmp_stats_msg *msgp =
+           (struct xve_xsmp_stats_msg *)(data +
+                                         sizeof(struct xsmp_message_header));
+
+       void *msg;
+       struct xsmp_message_header *m_header;
+
+       priv = xve_get_xve_by_vid(be64_to_cpu(msgp->resource_id));
+       if (!priv) {
+               xve_test("XVE: %s priv not found for %llx\n",
+                        __func__, be64_to_cpu(msgp->resource_id));
+               return;
+       }
+
+       msg = xcpm_alloc_msg(length);
+       if (!msg)
+               return;
+       m_header = (struct xsmp_message_header *)msg;
+       m_header->type = XSMP_MESSAGE_TYPE_XVE;
+       m_header->length = length;
+
+       /* Clear stats */
+       if (msgp->bitmask == 0)
+               memset(&priv->stats, 0, sizeof(struct net_device_stats));
+       msgp->rx_packets = priv->stats.rx_packets;
+       msgp->rx_bytes = priv->stats.rx_bytes;
+       msgp->rx_errors = priv->stats.rx_errors;
+       msgp->rx_drops = priv->stats.rx_dropped;
+
+       msgp->tx_packets = priv->stats.tx_packets;
+       msgp->tx_bytes = priv->stats.tx_bytes;
+       msgp->tx_errors = priv->stats.tx_errors;
+       msgp->tx_drops = priv->stats.tx_dropped;
+
+       memcpy(msg + sizeof(*m_header), msgp, sizeof(*msgp));
+       xve_xsmp_send_msg(priv->xsmp_hndl, msg, length);
+
+}
+
+static int xve_xsmp_update(xsmp_cookie_t xsmp_hndl, struct xve_xsmp_msg *xmsgp)
+{
+       u32 bitmask = be32_to_cpu(xmsgp->bitmask);
+       struct xve_dev_priv *xvep;
+       int ret = 0;
+       int send_ack = 1;
+
+       xvep = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
+       if (!xvep) {
+               XSMP_ERROR("%s: request for invalid vid: 0x%llx\n",
+                          __func__, be64_to_cpu(xmsgp->resource_id));
+               return -EINVAL;
+       }
+
+       XSMP_INFO("%s: VNIC: %s bit mask: 0x%x\n", __func__, xvep->xve_name,
+                 bitmask);
+
+       mutex_lock(&xvep->mutex);
+
+       if (bitmask & XVE_UPDATE_ADMIN_STATE) {
+               ret = handle_admin_state_change(xvep, xmsgp);
+               /*
+                * Ack will be sent once QP's are brought down
+                */
+               send_ack = 0;
+       }
+
+       if (send_ack) {
+               ret = xve_xsmp_send_ack(xvep, xmsgp);
+               if (ret)
+                       XSMP_ERROR("%s: xve_xsmp_send_ack error name: %s\n"
+                                  "VID=0x%llx\n", __func__, xmsgp->xve_name,
+                                  be64_to_cpu(xmsgp->resource_id));
+       }
+       mutex_unlock(&xvep->mutex);
+
+       return ret;
+}
+
+/*
+ * We set the DELETING bit and let sm_work thread handle delete
+ */
+static void xve_handle_del_message(xsmp_cookie_t xsmp_hndl,
+                                  struct xve_xsmp_msg *xmsgp)
+{
+       struct xve_dev_priv *priv;
+       unsigned long flags;
+
+       priv = xve_get_xve_by_vid(be64_to_cpu(xmsgp->resource_id));
+       if (!priv) {
+               XSMP_INFO("XVE: %s priv not found for %s\n",
+                         __func__, xmsgp->xve_name);
+               return;
+       }
+       spin_lock_irqsave(&priv->lock, flags);
+       set_bit(XVE_DELETING, &priv->state);
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+}
+
+static void handle_xve_xsmp_messages(xsmp_cookie_t xsmp_hndl, u8 *data,
+                                    int length)
+{
+       int hlen;
+       struct xsmp_message_header *header = (struct xsmp_message_header *)data;
+       struct xve_xsmp_msg *xmsgp =
+           (struct xve_xsmp_msg *)(data + sizeof(*header));
+
+       if (length < sizeof(*header))
+               return;
+       hlen = be16_to_cpu(header->length);
+       if (hlen > length)
+               return;
+       if (header->type != XSMP_MESSAGE_TYPE_XVE)
+               return;
+       XSMP_INFO("%s: XSMP message type: %d\n", __func__, xmsgp->type);
+
+       switch (xmsgp->type) {
+       case XSMP_XVE_VLANIP:
+               xve_handle_ip_req(xsmp_hndl, data, length);
+               break;
+       case XSMP_XVE_INFO_REQUEST:
+               break;
+       case XSMP_XVE_INSTALL:
+               xve_counters[XVE_VNIC_INSTALL_COUNTER]++;
+               xve_xsmp_install(xsmp_hndl, xmsgp, data, length);
+               break;
+       case XSMP_XVE_DELETE:
+               xve_counters[XVE_VNIC_DEL_COUNTER]++;
+               xve_handle_del_message(xsmp_hndl, xmsgp);
+               break;
+       case XSMP_XVE_UPDATE:
+               xve_counters[XVE_VNIC_UPDATE_COUNTER]++;
+               xve_xsmp_update(xsmp_hndl, xmsgp);
+               break;
+       case XSMP_XVE_OPER_REQ:
+               xve_counters[XVE_VNIC_OPER_REQ_COUNTER]++;
+               (void)xve_xsmp_handle_oper_req(xsmp_hndl,
+                                              be64_to_cpu(xmsgp->resource_id));
+               break;
+       case XSMP_XVE_STATS:
+               xve_counters[XVE_VNIC_STATS_COUNTER]++;
+               (void)xve_xsmp_send_stats(xsmp_hndl, data, length);
+               break;
+       default:
+               break;
+       }
+}
+
+static void handle_xve_xsmp_messages_work(struct work_struct *work)
+{
+       struct xve_work *xwork = container_of(work, struct xve_work,
+                                             work);
+
+       (void)handle_xve_xsmp_messages(xwork->xsmp_hndl, xwork->msg,
+                                      xwork->len);
+       kfree(xwork->msg);
+       kfree(xwork);
+}
+
+/*
+ * Called from thread context
+ */
+static void xve_receive_handler(xsmp_cookie_t xsmp_hndl, u8 *msg, int length)
+{
+       struct xve_work *work;
+       unsigned long flags = 0;
+
+       work = kmalloc(sizeof(*work), GFP_KERNEL);
+       if (!work) {
+               kfree(msg);
+               return;
+       }
+       INIT_WORK(&work->work, handle_xve_xsmp_messages_work);
+       work->xsmp_hndl = xsmp_hndl;
+       work->msg = msg;
+       work->len = length;
+
+       spin_lock_irqsave(&xve_lock, flags);
+       queue_work(xve_workqueue, &work->work);
+       spin_unlock_irqrestore(&xve_lock, flags);
+
+}
+
+/*
+ * Needs to be called with mutex lock held
+ */
+static void xve_wait_for_removal(xsmp_cookie_t xsmp_hndl)
+{
+       int is_pres;
+       struct xve_dev_priv *priv;
+
+       while (1) {
+               is_pres = 0;
+               list_for_each_entry(priv, &xve_dev_list, list) {
+                       if (xsmp_sessions_match(&priv->xsmp_info, xsmp_hndl))
+                               is_pres = 1;
+               }
+               if (is_pres) {
+                       mutex_unlock(&xve_mutex);
+                       msleep(100);
+                       mutex_lock(&xve_mutex);
+               } else
+                       break;
+       }
+}
+
+static void xve_xsmp_event_handler(xsmp_cookie_t xsmp_hndl, int event)
+{
+       struct xve_dev_priv *priv;
+       unsigned long flags;
+
+       mutex_lock(&xve_mutex);
+
+       switch (event) {
+       case XSCORE_PORT_UP:
+       case XSCORE_PORT_DOWN:
+               list_for_each_entry(priv, &xve_dev_list, list) {
+                       if (xsmp_sessions_match(&priv->xsmp_info, xsmp_hndl)) {
+                               if (event == XSCORE_PORT_DOWN) {
+                                       set_bit(XVE_IBLINK_DOWN, &priv->state);
+                                       priv->counters
+                                           [XVE_IBLINK_DOWN_COUNTER]++;
+                               } else {
+                                       clear_bit(XVE_IBLINK_DOWN,
+                                                 &priv->state);
+                                       xve_set_oper_up_state(priv);
+                                       priv->counters[XVE_IBLINK_UP_COUNTER]++;
+                               }
+                       }
+               }
+               break;
+       case XSCORE_DEVICE_REMOVAL:
+               xve_counters[XVE_DEVICE_REMOVAL_COUNTER]++;
+               list_for_each_entry(priv, &xve_dev_list, list) {
+                       if (xsmp_sessions_match(&priv->xsmp_info, xsmp_hndl)) {
+                               spin_lock_irqsave(&priv->lock, flags);
+                               set_bit(XVE_DELETING, &priv->state);
+                               spin_unlock_irqrestore(&priv->lock, flags);
+                       }
+               }
+               /*
+                * Now wait for all the vnics to be deleted
+                */
+               xve_wait_for_removal(xsmp_hndl);
+               break;
+       case XSCORE_CONN_CONNECTED:
+               list_for_each_entry(priv, &xve_dev_list, list) {
+                       if (xsmp_sessions_match(&priv->xsmp_info, xsmp_hndl))
+                               priv->xsmp_hndl = xsmp_hndl;
+               }
+               break;
+       default:
+               break;
+       }
+
+       mutex_unlock(&xve_mutex);
+}
+
+static int xve_xsmp_callout_handler(char *name)
+{
+       struct xve_dev_priv *priv;
+       int ret = 0;
+
+       mutex_lock(&xve_mutex);
+       list_for_each_entry(priv, &xve_dev_list, list) {
+               /* CHECK for duplicate name */
+               if (strcmp(priv->xve_name, name) == 0) {
+                       ret = -EINVAL;
+                       break;
+               }
+       }
+       mutex_unlock(&xve_mutex);
+       return ret;
+}
+
+int xve_xsmp_init(void)
+{
+       struct xsmp_service_reg_info service_info = {
+               .receive_handler = xve_receive_handler,
+               .event_handler = xve_xsmp_event_handler,
+               .callout_handler = xve_xsmp_callout_handler,
+               .ctrl_message_type = XSMP_MESSAGE_TYPE_XVE,
+               .resource_flag_index = RESOURCE_FLAG_INDEX_XVE
+       };
+
+       xve_xsmp_service_id = xcpm_register_service(&service_info);
+       if (xve_xsmp_service_id < 0)
+               return xve_xsmp_service_id;
+       return 0;
+}
+
+void xve_xsmp_exit(void)
+{
+       (void)xcpm_unregister_service(xve_xsmp_service_id);
+       xve_xsmp_service_id = -1;
+}
+
+static int __init xve_init_module(void)
+{
+       int ret;
+
+       INIT_LIST_HEAD(&xve_dev_list);
+       spin_lock_init(&xve_lock);
+
+       mutex_init(&xve_mutex);
+
+       xve_recvq_size = roundup_pow_of_two(xve_recvq_size);
+       xve_recvq_size = min(xve_recvq_size, XVE_MAX_QUEUE_SIZE);
+       xve_recvq_size = max(xve_recvq_size, XVE_MIN_QUEUE_SIZE);
+
+       xve_sendq_size = roundup_pow_of_two(xve_sendq_size);
+       xve_sendq_size = min(xve_sendq_size, XVE_MAX_QUEUE_SIZE);
+       xve_sendq_size = max(xve_sendq_size, max(2 * MAX_SEND_CQE,
+                                                XVE_MIN_QUEUE_SIZE));
+       /*
+        * When copying small received packets, we only copy from the
+        * linear data part of the SKB, so we rely on this condition.
+        */
+       BUILD_BUG_ON(XVE_CM_COPYBREAK > XVE_CM_HEAD_SIZE);
+
+       ret = xve_create_procfs_root_entries();
+       if (ret)
+               return ret;
+
+       ret = xve_tables_init();
+       if (ret)
+               goto err_fs;
+
+       /*
+        * We create our own workqueue mainly because we want to be
+        * able to flush it when devices are being removed.  We can't
+        * use schedule_work()/flush_scheduled_work() because both
+        * unregister_netdev() and linkwatch_event take the rtnl lock,
+        * so flush_scheduled_work() can deadlock during device
+        * removal.
+        */
+       xve_workqueue = create_singlethread_workqueue("xve");
+       if (!xve_workqueue) {
+               ret = -ENOMEM;
+               goto err_tables;
+       }
+
+       xve_taskqueue = create_singlethread_workqueue("xve_taskq");
+       if (!xve_taskqueue) {
+               ret = -ENOMEM;
+               goto err_tables;
+       }
+
+       xve_xsmp_init();
+       /*
+        * Now register with IB framework
+        */
+       ib_sa_register_client(&xve_sa_client);
+       return 0;
+
+err_tables:
+       xve_tables_exit();
+
+err_fs:
+       xve_remove_procfs_root_entries();
+       return ret;
+}
+
+static void __exit xve_cleanup_module(void)
+{
+       struct xve_dev_priv *priv;
+       unsigned long flags = 0;
+
+       pr_info("XVE: %s Remove module\n", __func__);
+       xve_xsmp_exit();
+
+       mutex_lock(&xve_mutex);
+
+       list_for_each_entry(priv, &xve_dev_list, list) {
+               spin_lock_irqsave(&priv->lock, flags);
+               set_bit(XVE_DELETING, &priv->state);
+               set_bit(XVE_SHUTDOWN, &priv->state);
+               spin_unlock_irqrestore(&priv->lock, flags);
+       }
+
+       while (!list_empty(&xve_dev_list)) {
+               mutex_unlock(&xve_mutex);
+               msleep(100);
+               mutex_lock(&xve_mutex);
+       }
+       mutex_unlock(&xve_mutex);
+       ib_sa_unregister_client(&xve_sa_client);
+       xve_tables_exit();
+       mutex_lock(&xve_mutex);
+       flush_workqueue(xve_workqueue);
+       destroy_workqueue(xve_workqueue);
+       flush_workqueue(xve_taskqueue);
+       destroy_workqueue(xve_taskqueue);
+       mutex_unlock(&xve_mutex);
+
+       xve_remove_procfs_root_entries();
+       mutex_destroy(&xve_mutex);
+       pr_info("XVE: %s module remove success\n", __func__);
+}
+
+module_init(xve_init_module);
+module_exit(xve_cleanup_module);
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_multicast.c b/drivers/infiniband/ulp/xsigo/xve/xve_multicast.c
new file mode 100644 (file)
index 0000000..19ee47a
--- /dev/null
@@ -0,0 +1,833 @@
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/skbuff.h>
+#include <linux/delay.h>
+#include <linux/completion.h>
+
+#include "xve.h"
+#include "xve_compat.h"
+
+static int rate_selector = IB_SA_EQ;
+module_param(rate_selector, int, 0444);
+MODULE_PARM_DESC(rate_selector, "Multicast rate selector");
+
+static int mcast_rate = IB_RATE_10_GBPS;
+module_param(mcast_rate, int, 0444);
+MODULE_PARM_DESC(mcast_rate, "Multicast rate during join/create");
+
+static DEFINE_MUTEX(mcast_mutex);
+
+struct xve_mcast_iter {
+       struct net_device *dev;
+       union ib_gid mgid;
+       unsigned long created;
+       unsigned int queuelen;
+       unsigned int complete;
+       unsigned int send_only;
+};
+
+static void xve_mcast_free(struct xve_mcast *mcast)
+{
+       struct net_device *dev = mcast->netdev;
+       int tx_dropped = 0;
+
+       xve_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
+                     mcast->mcmember.mgid.raw);
+
+       if (mcast->ah)
+               xve_put_ah(mcast->ah);
+
+       while (!skb_queue_empty(&mcast->pkt_queue)) {
+               ++tx_dropped;
+               dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
+       }
+
+       netif_tx_lock_bh(dev);
+       dev->stats.tx_dropped += tx_dropped;
+       ((struct xve_dev_priv *)netdev_priv(dev))->stats.tx_dropped +=
+           tx_dropped;
+       netif_tx_unlock_bh(dev);
+
+       kfree(mcast);
+}
+
+static struct xve_mcast *xve_mcast_alloc(struct net_device *dev, int can_sleep)
+{
+       struct xve_mcast *mcast;
+
+       mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC);
+       if (!mcast)
+               return NULL;
+
+       mcast->netdev = dev;
+       mcast->created = jiffies;
+       mcast->used = jiffies;
+       mcast->backoff = 1;
+
+       INIT_LIST_HEAD(&mcast->list);
+       skb_queue_head_init(&mcast->pkt_queue);
+
+       return mcast;
+}
+
+static struct xve_mcast *__xve_mcast_find(struct net_device *dev, void *mgid)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct rb_node *n = priv->multicast_tree.rb_node;
+
+       while (n) {
+               struct xve_mcast *mcast;
+               int ret;
+
+               mcast = rb_entry(n, struct xve_mcast, rb_node);
+
+               ret = memcmp(mgid, mcast->mcmember.mgid.raw,
+                            sizeof(union ib_gid));
+               if (ret < 0)
+                       n = n->rb_left;
+               else if (ret > 0)
+                       n = n->rb_right;
+               else
+                       return mcast;
+       }
+
+       return NULL;
+}
+
+static int __xve_mcast_add(struct net_device *dev, struct xve_mcast *mcast)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
+
+       while (*n) {
+               struct xve_mcast *tmcast;
+               int ret;
+
+               pn = *n;
+               tmcast = rb_entry(pn, struct xve_mcast, rb_node);
+
+               ret =
+                   memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
+                          sizeof(union ib_gid));
+               if (ret < 0)
+                       n = &pn->rb_left;
+               else if (ret > 0)
+                       n = &pn->rb_right;
+               else
+                       return -EEXIST;
+       }
+
+       rb_link_node(&mcast->rb_node, pn, n);
+       rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
+
+       return 0;
+}
+
+static int xve_mcast_join_finish(struct xve_mcast *mcast,
+                                struct ib_sa_mcmember_rec *mcmember)
+{
+       struct net_device *dev = mcast->netdev;
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_ah *ah;
+       int ret;
+       int set_qkey = 0;
+
+       mcast->mcmember = *mcmember;
+
+       /* Set the cached Q_Key before we attach if it's the broadcast group */
+       if (!memcmp(mcast->mcmember.mgid.raw, priv->bcast_mgid.raw,
+                   sizeof(union ib_gid))) {
+               spin_lock_irq(&priv->lock);
+               if (!priv->broadcast) {
+                       spin_unlock_irq(&priv->lock);
+                       return -EAGAIN;
+               }
+               priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
+               spin_unlock_irq(&priv->lock);
+               priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
+               set_qkey = 1;
+       }
+
+       if (!test_bit(XVE_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+               if (test_and_set_bit(XVE_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+                       xve_warn(priv,
+                                "multicast group %pI6 already attached\n",
+                                mcast->mcmember.mgid.raw);
+
+                       return 0;
+               }
+
+               ret = xve_mcast_attach(dev, be16_to_cpu(mcast->mcmember.mlid),
+                                      &mcast->mcmember.mgid, set_qkey);
+               if (ret < 0) {
+                       xve_warn(priv,
+                                "couldn't attach QP to multicast group %pI6\n",
+                                mcast->mcmember.mgid.raw);
+
+                       clear_bit(XVE_MCAST_FLAG_ATTACHED, &mcast->flags);
+                       return ret;
+               }
+       }
+
+       {
+               struct ib_ah_attr av = {
+                       .dlid = be16_to_cpu(mcast->mcmember.mlid),
+                       .port_num = priv->port,
+                       .sl = mcast->mcmember.sl,
+                       .ah_flags = IB_AH_GRH,
+                       .static_rate = mcast->mcmember.rate,
+                       .grh = {
+                               .flow_label =
+                               be32_to_cpu(mcast->mcmember.flow_label),
+                               .hop_limit = mcast->mcmember.hop_limit,
+                               .sgid_index = 0,
+                               .traffic_class = mcast->mcmember.traffic_class}
+               };
+               av.grh.dgid = mcast->mcmember.mgid;
+
+               ah = xve_create_ah(dev, priv->pd, &av);
+               if (!ah) {
+                       xve_warn(priv, "ib_address_create failed\n");
+               } else {
+                       spin_lock_irq(&priv->lock);
+                       mcast->ah = ah;
+                       spin_unlock_irq(&priv->lock);
+
+                       xve_dbg_mcast(priv,
+                                     "MGID %pI6 AV %p, LID 0x%04x, SL %d\n",
+                                     mcast->mcmember.mgid.raw, mcast->ah->ah,
+                                     be16_to_cpu(mcast->mcmember.mlid),
+                                     mcast->mcmember.sl);
+               }
+       }
+
+       /* actually send any queued packets */
+       netif_tx_lock_bh(dev);
+       while (!skb_queue_empty(&mcast->pkt_queue)) {
+               struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
+
+               netif_tx_unlock_bh(dev);
+               skb->dev = dev;
+               if (dev_queue_xmit(skb))
+                       xve_warn(priv,
+                                "dev_queue_xmit failed to requeue packet\n");
+               netif_tx_lock_bh(dev);
+       }
+       netif_tx_unlock_bh(dev);
+
+       return 0;
+}
+
+static int xve_mcast_sendonly_join_complete(int status,
+                                           struct ib_sa_multicast *multicast)
+{
+       struct xve_mcast *mcast = multicast->context;
+       struct net_device *dev = mcast->netdev;
+
+       /* We trap for port events ourselves. */
+       if (status == -ENETRESET)
+               return 0;
+
+       if (!status)
+               status = xve_mcast_join_finish(mcast, &multicast->rec);
+
+       if (status) {
+               if (mcast->logcount++ < 20)
+                       xve_dbg_mcast(netdev_priv(dev),
+                                     "%s multicast join failed for %pI6, status %d\n",
+                                     __func__, mcast->mcmember.mgid.raw,
+                                     status);
+
+               /* Flush out any queued packets */
+               netif_tx_lock_bh(dev);
+               while (!skb_queue_empty(&mcast->pkt_queue)) {
+                       INC_TX_DROP_STATS(((struct xve_dev_priv *)
+                                          netdev_priv(dev)), dev);
+                       dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
+               }
+               netif_tx_unlock_bh(dev);
+               /* Clear the busy flag so we try again */
+               status = test_and_clear_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
+       }
+       return status;
+}
+
+static int xve_mcast_sendonly_join(struct xve_mcast *mcast)
+{
+       struct net_device *dev = mcast->netdev;
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_sa_mcmember_rec rec = {
+               .join_state = 1
+       };
+       ib_sa_comp_mask comp_mask;
+       int ret = 0;
+
+       if (!test_bit(XVE_FLAG_OPER_UP, &priv->flags)) {
+               xve_dbg_mcast(priv,
+                             "device shutting down, no multicast joins\n");
+               return -ENODEV;
+       }
+
+       if (test_and_set_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags)) {
+               xve_dbg_mcast(priv, "multicast entry busy, skipping\n");
+               return -EBUSY;
+       }
+
+       rec.mgid = mcast->mcmember.mgid;
+       rec.port_gid = priv->local_gid;
+       rec.pkey = cpu_to_be16(priv->pkey);
+
+       comp_mask =
+           IB_SA_MCMEMBER_REC_MGID |
+           IB_SA_MCMEMBER_REC_PORT_GID |
+           IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE;
+
+       if (priv->broadcast) {
+               comp_mask |=
+                   IB_SA_MCMEMBER_REC_QKEY |
+                   IB_SA_MCMEMBER_REC_MTU_SELECTOR |
+                   IB_SA_MCMEMBER_REC_MTU |
+                   IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |
+                   IB_SA_MCMEMBER_REC_RATE_SELECTOR |
+                   IB_SA_MCMEMBER_REC_RATE |
+                   IB_SA_MCMEMBER_REC_SL |
+                   IB_SA_MCMEMBER_REC_FLOW_LABEL |
+                   IB_SA_MCMEMBER_REC_HOP_LIMIT;
+
+               rec.qkey = priv->broadcast->mcmember.qkey;
+               rec.mtu_selector = IB_SA_EQ;
+               rec.mtu = priv->broadcast->mcmember.mtu;
+               rec.traffic_class = priv->broadcast->mcmember.traffic_class;
+               rec.rate_selector = IB_SA_EQ;
+               rec.rate = priv->broadcast->mcmember.rate;
+               rec.sl = priv->broadcast->mcmember.sl;
+               rec.flow_label = priv->broadcast->mcmember.flow_label;
+               rec.hop_limit = priv->broadcast->mcmember.hop_limit;
+       }
+       xve_dbg_mcast(priv, "%s Joining send only join mtu %d\n", __func__,
+                     rec.mtu);
+
+       mcast->mc = ib_sa_join_multicast(&xve_sa_client, priv->ca,
+                                        priv->port, &rec,
+                                        comp_mask,
+                                        GFP_ATOMIC,
+                                        xve_mcast_sendonly_join_complete,
+                                        mcast);
+       if (IS_ERR(mcast->mc)) {
+               ret = PTR_ERR(mcast->mc);
+               clear_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
+               xve_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n", ret);
+       } else {
+               xve_dbg_mcast(priv,
+                             "no multicast record for %pI6, starting join\n",
+                             mcast->mcmember.mgid.raw);
+       }
+
+       return ret;
+}
+
+static int xve_mcast_join_complete(int status,
+                                  struct ib_sa_multicast *multicast)
+{
+       struct xve_mcast *mcast = multicast->context;
+       struct net_device *dev = mcast->netdev;
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       xve_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
+                     mcast->mcmember.mgid.raw, status);
+
+       /* We trap for port events ourselves. */
+       if (status == -ENETRESET)
+               return 0;
+
+       if (!status)
+               status = xve_mcast_join_finish(mcast, &multicast->rec);
+
+       if (!status) {
+               mcast->backoff = 1;
+               mutex_lock(&mcast_mutex);
+               if (test_bit(XVE_MCAST_RUN, &priv->flags))
+                       xve_queue_complete_work(priv, XVE_WQ_START_MCASTJOIN,
+                                               0);
+               mutex_unlock(&mcast_mutex);
+
+               /*
+                * Defer carrier on work to workqueue to avoid a
+                * deadlock on rtnl_lock here.
+                */
+               if (mcast == priv->broadcast)
+                       xve_queue_work(priv, XVE_WQ_START_MCASTON);
+
+               return 0;
+       }
+
+       if (mcast->logcount++ < 20) {
+               if (status == -ETIMEDOUT || status == -EAGAIN) {
+                       xve_dbg_mcast(priv,
+                                     "%s multicast join failed for %pI6, status %d\n",
+                                     __func__, mcast->mcmember.mgid.raw,
+                                     status);
+               } else {
+                       xve_warn(priv,
+                                "%s multicast join failed for %pI6, status %d\n",
+                                __func__, mcast->mcmember.mgid.raw, status);
+               }
+       }
+
+       mcast->backoff *= 2;
+       if (mcast->backoff > XVE_MAX_BACKOFF_SECONDS)
+               mcast->backoff = XVE_MAX_BACKOFF_SECONDS;
+
+       /* Clear the busy flag so we try again */
+       status = test_and_clear_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
+
+       mutex_lock(&mcast_mutex);
+       spin_lock_irq(&priv->lock);
+       if (test_bit(XVE_MCAST_RUN, &priv->flags))
+               xve_queue_complete_work(priv, XVE_WQ_START_MCASTJOIN,
+                                       mcast->backoff * HZ);
+       spin_unlock_irq(&priv->lock);
+       mutex_unlock(&mcast_mutex);
+
+       return status;
+}
+
+static void xve_mcast_join(struct net_device *dev, struct xve_mcast *mcast,
+                          int create)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_sa_mcmember_rec rec = {
+               .join_state = 1
+       };
+       ib_sa_comp_mask comp_mask;
+       int ret = 0;
+
+       rec.mgid = mcast->mcmember.mgid;
+       rec.port_gid = priv->local_gid;
+       rec.pkey = cpu_to_be16(priv->pkey);
+
+       comp_mask =
+           IB_SA_MCMEMBER_REC_MGID |
+           IB_SA_MCMEMBER_REC_PORT_GID |
+           IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE;
+
+       if (create) {
+               comp_mask |=
+                   IB_SA_MCMEMBER_REC_QKEY |
+                   IB_SA_MCMEMBER_REC_TRAFFIC_CLASS |
+                   IB_SA_MCMEMBER_REC_SL |
+                   IB_SA_MCMEMBER_REC_FLOW_LABEL |
+                   IB_SA_MCMEMBER_REC_RATE_SELECTOR |
+                   IB_SA_MCMEMBER_REC_RATE | IB_SA_MCMEMBER_REC_HOP_LIMIT;
+
+               rec.qkey = 0x0;
+               rec.traffic_class = 0x0;
+               rec.sl = 0x0;
+               rec.flow_label = 0x0;
+               rec.hop_limit = 0x0;
+               /*
+                * Create with 10Gbps speed (equals)
+                */
+               rec.rate_selector = rate_selector;
+               rec.rate = mcast_rate;
+       }
+
+       xve_dbg_mcast(priv, "joining MGID %pI6 pkey %d qkey %d\n",
+                     mcast->mcmember.mgid.raw, rec.pkey, rec.qkey);
+       set_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
+       mcast->mc = ib_sa_join_multicast(&xve_sa_client, priv->ca, priv->port,
+                                        &rec, comp_mask, GFP_KERNEL,
+                                        xve_mcast_join_complete, mcast);
+       if (IS_ERR(mcast->mc)) {
+               clear_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags);
+               ret = PTR_ERR(mcast->mc);
+               xve_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
+
+               mcast->backoff *= 2;
+               if (mcast->backoff > XVE_MAX_BACKOFF_SECONDS)
+                       mcast->backoff = XVE_MAX_BACKOFF_SECONDS;
+
+               mutex_lock(&mcast_mutex);
+               if (test_bit(XVE_MCAST_RUN, &priv->flags))
+                       xve_queue_complete_work(priv, XVE_WQ_START_MCASTJOIN,
+                                               mcast->backoff * HZ);
+               mutex_unlock(&mcast_mutex);
+       }
+}
+
+void xve_mcast_join_task(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_MCASTJOIN, 2);
+       struct net_device *dev = priv->netdev;
+       struct ib_port_attr attr;
+
+       if (!test_bit(XVE_MCAST_RUN, &priv->flags))
+               return;
+
+       if (!ib_query_port(priv->ca, priv->port, &attr))
+               priv->local_lid = attr.lid;
+       else
+               xve_warn(priv, "ib_query_port failed\n");
+
+       priv->counters[XVE_MCAST_JOIN_TASK]++;
+
+       if (!priv->broadcast) {
+               struct xve_mcast *broadcast;
+
+               if (!test_bit(XVE_FLAG_ADMIN_UP, &priv->flags))
+                       return;
+
+               broadcast = xve_mcast_alloc(dev, 1);
+               if (!broadcast) {
+                       xve_warn(priv, "failed to allocate broadcast group\n");
+                       mutex_lock(&mcast_mutex);
+                       if (test_bit(XVE_MCAST_RUN, &priv->flags))
+                               xve_queue_complete_work(priv,
+                                                       XVE_WQ_START_MCASTJOIN,
+                                                       HZ);
+                       mutex_unlock(&mcast_mutex);
+                       return;
+               }
+
+               spin_lock_irq(&priv->lock);
+               memcpy(broadcast->mcmember.mgid.raw, priv->bcast_mgid.raw,
+                      sizeof(union ib_gid));
+               priv->broadcast = broadcast;
+               __xve_mcast_add(dev, priv->broadcast);
+               spin_unlock_irq(&priv->lock);
+       }
+
+       if (priv->broadcast &&
+           !test_bit(XVE_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
+               if (priv->broadcast &&
+                   !test_bit(XVE_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+                       xve_mcast_join(dev, priv->broadcast, 1);
+               return;
+       }
+
+       while (1) {
+               struct xve_mcast *mcast = NULL;
+
+               spin_lock_irq(&priv->lock);
+               list_for_each_entry(mcast, &priv->multicast_list, list) {
+                       if (!test_bit(XVE_MCAST_FLAG_SENDONLY, &mcast->flags)
+                           && !test_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags)
+                           && !test_bit(XVE_MCAST_FLAG_ATTACHED,
+                                        &mcast->flags)) {
+                               /* Found the next unjoined group */
+                               break;
+                       }
+               }
+               spin_unlock_irq(&priv->lock);
+
+               if (&mcast->list == &priv->multicast_list) {
+                       /* All done */
+                       break;
+               }
+
+               xve_mcast_join(dev, mcast, 1);
+               return;
+       }
+
+       spin_lock_irq(&priv->lock);
+       if (priv->broadcast)
+               priv->mcast_mtu =
+                   XVE_UD_MTU(ib_mtu_enum_to_int
+                              (priv->broadcast->mcmember.mtu));
+       else
+               priv->mcast_mtu = priv->admin_mtu;
+       spin_unlock_irq(&priv->lock);
+
+       if (!xve_cm_admin_enabled(dev)) {
+               printk
+                   ("XVE: %s xve %s dev mtu %d, admin_mtu %d, mcast_mtu %d\n",
+                    __func__, priv->xve_name, priv->netdev->mtu,
+                    priv->admin_mtu, priv->mcast_mtu);
+               xve_dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
+       }
+
+       xve_dbg_mcast(priv, "successfully joined all multicast groups\n");
+       clear_bit(XVE_MCAST_RUN, &priv->flags);
+}
+
+int xve_mcast_start_thread(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       /* Dont start mcast if it the interface is not up */
+       if (!test_bit(XVE_FLAG_ADMIN_UP, &priv->flags)
+           || !test_bit(XVE_CHASSIS_ADMIN_UP, &priv->state))
+               return -ENOTCONN;
+
+       xve_dbg_mcast(priv, "%s Starting  mcast thread for  state[%ld ]\n",
+                     __func__, priv->flags);
+
+       mutex_lock(&mcast_mutex);
+       if (!test_and_set_bit(XVE_MCAST_RUN, &priv->flags))
+               xve_queue_complete_work(priv, XVE_WQ_START_MCASTJOIN, 0);
+
+       if (!test_and_set_bit(XVE_MCAST_RUN_GC, &priv->flags))
+               xve_queue_complete_work(priv, XVE_WQ_START_MCASTLEAVE, 0);
+
+       mutex_unlock(&mcast_mutex);
+
+       return 0;
+}
+
+int xve_mcast_stop_thread(struct net_device *dev, int flush)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+
+       xve_dbg_mcast(priv, "stopping multicast thread\n");
+
+       mutex_lock(&mcast_mutex);
+       clear_bit(XVE_MCAST_RUN, &priv->flags);
+       clear_bit(XVE_MCAST_RUN_GC, &priv->flags);
+       cancel_delayed_work(&priv->mcast_join_task);
+       cancel_delayed_work(&priv->mcast_leave_task);
+       mutex_unlock(&mcast_mutex);
+
+       return 0;
+}
+
+static int xve_mcast_leave(struct net_device *dev, struct xve_mcast *mcast)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int ret = 0;
+
+       if (test_and_clear_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags))
+               ib_sa_free_multicast(mcast->mc);
+
+       if (test_and_clear_bit(XVE_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+               xve_dbg_mcast(priv, "leaving MGID %pI6\n",
+                             mcast->mcmember.mgid.raw);
+
+               /* Remove ourselves from the multicast group */
+               if (priv->qp) {
+                       if (!test_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags))
+                               ret =
+                                   ib_detach_mcast(priv->qp,
+                                                   &mcast->mcmember.mgid,
+                                                   be16_to_cpu(mcast->
+                                                               mcmember.mlid));
+               }
+               if (ret)
+                       xve_warn(priv, "ib_detach_mcast failed (result = %d)\n",
+                                ret);
+       }
+
+       return 0;
+}
+
+void xve_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct xve_mcast *mcast;
+
+       if (!test_bit(XVE_FLAG_OPER_UP, &priv->flags) ||
+           !priv->broadcast ||
+           !test_bit(XVE_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
+               INC_TX_DROP_STATS(priv, dev);
+               dev_kfree_skb_any(skb);
+               return;
+       }
+
+       mcast = __xve_mcast_find(dev, mgid);
+       if (!mcast) {
+               /* Let's create a new send only group now */
+               xve_dbg_mcast(priv,
+                             "setting up send only multicast group for %pI6\n",
+                             mgid);
+
+               mcast = xve_mcast_alloc(dev, 0);
+               if (!mcast) {
+                       xve_warn(priv, "unable to allocate memory for ");
+                       xve_warn(priv, "multicast structure\n");
+                       INC_TX_DROP_STATS(priv, dev);
+                       dev_kfree_skb_any(skb);
+                       goto out;
+               }
+
+               set_bit(XVE_MCAST_FLAG_SENDONLY, &mcast->flags);
+               memcpy(mcast->mcmember.mgid.raw, mgid, sizeof(union ib_gid));
+               __xve_mcast_add(dev, mcast);
+               list_add_tail(&mcast->list, &priv->multicast_list);
+       }
+
+       if (!mcast->ah) {
+               if (skb_queue_len(&mcast->pkt_queue) < XVE_MAX_MCAST_QUEUE)
+                       skb_queue_tail(&mcast->pkt_queue, skb);
+               else {
+                       INC_TX_DROP_STATS(priv, dev);
+                       dev_kfree_skb_any(skb);
+               }
+
+               if (test_bit(XVE_MCAST_FLAG_BUSY, &mcast->flags)) {
+                       xve_dbg_mcast(priv, "no address vector, ");
+                       xve_dbg_mcast(priv, "but mcast join already started\n");
+               }
+               if (test_bit(XVE_MCAST_FLAG_SENDONLY, &mcast->flags))
+                       xve_mcast_sendonly_join(mcast);
+               /*
+                * If lookup completes between here and out:, don't
+                * want to send packet twice.
+                */
+               mcast = NULL;
+       }
+
+out:
+       if (mcast && mcast->ah) {
+               xve_test("%s about to send mcast %02x%02x%02x%02x%02x%02x",
+                        __func__, skb->data[0], skb->data[1], skb->data[2],
+                        skb->data[3], skb->data[4], skb->data[5]);
+               xve_test("ah=%p proto=%02x%02x for %s\n", mcast->ah->ah,
+                        skb->data[12], skb->data[13], dev->name);
+               xve_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
+       }
+
+}
+
+void xve_mcast_carrier_on_task(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_MCASTON, 0);
+       struct ib_port_attr attr;
+
+       if (ib_query_port(priv->ca, priv->port, &attr) ||
+           attr.state != IB_PORT_ACTIVE) {
+               priv->counters[XVE_IB_PORT_NOT_ACTIVE]++;
+               xve_dbg_mcast(priv,
+                             "%s Keeping carrier off until IB port is active\n",
+                             __func__);
+               xve_put_ctx(priv);
+               return;
+       }
+
+       priv->counters[XVE_MCAST_CARRIER_TASK]++;
+       /*
+        * Take rtnl_lock to avoid racing with xve_stop() and
+        * turning the carrier back on while a device is being
+        * removed.
+        */
+       rtnl_lock();
+       if (!netif_carrier_ok(priv->netdev) && priv->broadcast &&
+           (test_bit(XVE_MCAST_FLAG_ATTACHED, &priv->broadcast->flags))) {
+               xve_dbg_mcast(priv, "XVE: %s Sending netif carrier on to %s\n",
+                             __func__, priv->xve_name);
+               handle_carrier_state(priv, 1);
+       }
+       rtnl_unlock();
+       xve_put_ctx(priv);
+}
+
+void xve_mcast_dev_flush(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       LIST_HEAD(remove_list);
+       struct xve_mcast *mcast, *tmcast;
+       unsigned long flags;
+
+       xve_dbg_mcast(priv, "flushing multicast list\n");
+
+       spin_lock_irqsave(&priv->lock, flags);
+       list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
+               list_del(&mcast->list);
+               rb_erase(&mcast->rb_node, &priv->multicast_tree);
+               list_add_tail(&mcast->list, &remove_list);
+               mcast->used = jiffies;
+       }
+
+       if (priv->broadcast) {
+               rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
+               list_add_tail(&priv->broadcast->list, &remove_list);
+               priv->broadcast = NULL;
+       }
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
+               mcast->used = jiffies;
+               xve_mcast_leave(dev, mcast);
+               xve_mcast_free(mcast);
+       }
+
+}
+
+void xve_mcast_restart_task(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_MCASTRESTART, 0);
+       struct net_device *dev = priv->netdev;
+
+       xve_dbg_mcast(priv, "%s Restarting  mcast thread for  state[%ld ]\n",
+                     __func__, priv->flags);
+       xve_mcast_stop_thread(dev, 0);
+       xve_mcast_start_thread(dev);
+       xve_put_ctx(priv);
+}
+
+void xve_mcast_leave_task(struct work_struct *work)
+{
+       struct xve_dev_priv *priv =
+           xve_get_wqctx(work, XVE_WQ_FINISH_MCASTLEAVE, 2);
+       struct net_device *dev = priv->netdev;
+       struct xve_mcast *mcast, *tmcast;
+       LIST_HEAD(remove_list);
+
+       if (!test_bit(XVE_MCAST_RUN_GC, &priv->flags))
+               return;
+
+       priv->counters[XVE_MCAST_LEAVE_TASK]++;
+
+       if (xve_mc_sendonly_timeout > 0) {
+               list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list,
+                                        list) {
+                       if (test_bit(XVE_MCAST_FLAG_SENDONLY, &mcast->flags)
+                           && time_before(mcast->used,
+                                          jiffies -
+                                          xve_mc_sendonly_timeout * HZ)) {
+                               rb_erase(&mcast->rb_node,
+                                        &priv->multicast_tree);
+                               list_move_tail(&mcast->list, &remove_list);
+                       }
+               }
+
+               list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
+                       xve_mcast_leave(dev, mcast);
+                       xve_mcast_free(mcast);
+               }
+       }
+
+       xve_queue_complete_work(priv, XVE_WQ_START_MCASTLEAVE, 60 * HZ);
+
+}
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_stats.c b/drivers/infiniband/ulp/xsigo/xve/xve_stats.c
new file mode 100755 (executable)
index 0000000..632aef3
--- /dev/null
@@ -0,0 +1,889 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+static int xs_seq_file;
+module_param(xs_seq_file, int, 0644);
+
+MODULE_PARM_DESC(xs_seq_file,
+                "Enabling the sequence files to print large data in /proc entries");
+
+static char *glob_counter_name[XVE_MAX_GLOB_COUNTERS] = {
+       "sync end del count:\t\t",
+       "vnic install count:\t\t",
+       "vnic del count:\t\t\t",
+       "vnic del novid count:\t\t",
+       "vnic update count:\t\t",
+       "vnic sync begin count:\t\t",
+       "vnic sync end count:\t\t",
+       "vnic oper req count:\t\t",
+       "vnic unsup cmd count:\t\t",
+       "iscsi info count:\t\t",
+       "xscore device remove count:\t",
+       "vnic stats req count:\t",
+       "number of pages allocated:\t",
+};
+
+static char *counter_name[XVE_MAX_COUNTERS] = {
+       "heartbeat_count:\t\t",
+       "hbeat send error count:\t\t",
+       "state_machine count:\t\t",
+       "state_machine_up count:\t\t",
+       "state_machine_down count:\t",
+       "napi_poll_count:\t\t",
+       "short_tx_pkt_count:\t\t",
+       "tx_skb_count:\t\t\t",
+       "tx skb free count:\t\t",
+       "tx vlan count:\t\t\t",
+       "tx error count:\t\t\t",
+       "tx wrb exhaust:\t\t\t",
+       "tx drop oper down count:\t",
+       "tx drop skb error count:\t",
+       "tx drop ring full count:\t",
+       "tx wake up count\t\t",
+       "tx queue stop count:\t\t",
+       "rx_skb_count:\t\t\t",
+       "rx_skb_alloc_count:\t\t",
+       "rx_smallskb_alloc_count:\t",
+       "rx_skb_freed_count:\t\t",
+       "rx skb offload count:\t\t",
+       "rx skb offl frag count:\t\t",
+       "rx skb offlnonipv4 count:\t",
+       "rx error count:\t\t\t",
+       "rx quota exceeded count:\t",
+       "rx no buf count:\t\t",
+       "napi sched count:\t\t",
+       "napi notsched count:\t\t",
+       "napi resched count:\t\t",
+       "open count:\t\t\t",
+       "stop count:\t\t\t",
+       "getstats count:\t\t\t",
+       "set mcast count:\t\t",
+       "vlan add count:\t\t\t",
+       "vlan del count:\t\t\t",
+       "ioctl count:\t\t\t",
+       "wdog timeout count:\t\t",
+       "oper req count:\t\t\t",
+       "admin up count:\t\t\t",
+       "admin down count:\t\t",
+       "sm poll count:\t\t\t",
+       "qp error count:\t\t\t",
+       "IB recovery count:\t\t",
+       "IB recovered count:\t\t",
+       "IB link down count:\t\t",
+       "IB link up count:\t\t",
+       "IB HCA port not active:\t\t",
+       "sent oper up count:\t\t",
+       "sent oper down count:\t\t",
+       "sent oper state failure count:\t",
+       "sent oper state success count:\t",
+       "drop standby count:\t\t",
+       "mac learn count:\t\t",
+       "mac aged count:\t\t\t",
+       "mac aged check count:\t\t",
+       "mac aged match not found:\t",
+       "mac aged still in use:\t\t",
+       "mac moved count:\t\t",
+       "mcast join task count:\t\t",
+       "mcast leave task count:\t\t",
+       "mcast carrier task count:\t",
+       "tx ud count:\t\t\t",
+       "tx rc count:\t\t\t",
+       "tx mcast count:\t\t\t",
+       "tx arp count:\t\t\t",
+       "tx ndp count:\t\t\t",
+       "tx arp vlan count:\t\t",
+       "tx ndp vlan count:\t\t",
+       "tx ud flood count:\t\t",
+       "tx rc flood count:\t\t",
+       "tx queue count:\t\t\t",
+       "tx path not found:\t\t",
+       "rx path not setup:\t\t",
+       "tx ah not found:\t\t",
+       "pathrec query count:\t\t",
+       "pathrec resp count:\t\t",
+       "pathrec resp err count:\t\t",
+       "ib sm_change count:\t\t",
+       "ib client_reregister count:\t",
+       "ib port_err count:\t\t",
+       "ib port_active count:\t\t",
+       "ib lid_active count:\t\t",
+       "ib pkey_change count:\t\t",
+       "ib invalid count:\t\t",
+};
+
+static char *misc_counter_name[XVE_MISC_MAX_COUNTERS] = {
+       "start  pkey poll:\t\t",
+       "complete  pkey poll:\t\t",
+       "start ah reap:\t\t\t",
+       "complete reap:\t\t\t",
+       "start fwt_aging:\t\t",
+       "complete fwt_aging:\t\t",
+       "start mcast join:\t\t",
+       "complete mcast join\t\t",
+       "start mcast leave:\t\t",
+       "complete mcast leave:\t\t",
+       "start mcast on:\t\t\t",
+       "complete mcast on:\t\t",
+       "start mcast restart:\t\t",
+       "complete mcast restart:\t\t",
+       "start  flush light:\t\t",
+       "complete  flush light:\t\t",
+       "start  flush normal:\t\t",
+       "complete flush normal:\t\t",
+       "start  flush heavy:\t\t",
+       "complete flush heavy:\t\t",
+       "start  cm stale:\t\t",
+       "complete cm stale:\t\t",
+       "start  cm tx start:\t\t",
+       "complete cm work start:\t\t",
+       "start  cm tx reap:\t\t",
+       "complete cm work tx reap:\t",
+       "start  cm rx reap:\t\t",
+       "complete cm work rx reap:\t",
+       "Workqueue not scheded:\t\t",
+       "Workqueue sched invalid:\t",
+       "WorkQueue sched failed:\t\t",
+};
+
+#define atoi(str)      kstrtoul(((str != NULL) ? str : ""), -1, 0)
+#define XS_RESCHED_NAPI                "napi_sched"
+#define XS_READIB_BUF          "read_ibbuf"
+#define XS_RXBATCHING_ON       "rbatch on"
+#define XS_RXBATCHING_OFF      "rbatch off"
+
+struct proc_dir_entry *proc_root_xve = NULL;
+struct proc_dir_entry *proc_root_xve_dev = NULL;
+
+static int xve_proc_open_device(struct inode *inode, struct file *file);
+static int xve_proc_read_device(struct seq_file *m, void *data);
+static ssize_t xve_proc_write_device(struct file *file,
+                                    const char __user *buffer, size_t count,
+                                    loff_t *offp);
+static int xve_proc_open_debug(struct inode *inode, struct file *file);
+static int xve_proc_read_debug(struct seq_file *m, void *data);
+static ssize_t xve_proc_write_debug(struct file *file,
+                                   const char __user *buffer, size_t count,
+                                   loff_t *offp);
+static void *xve_seq_start(struct seq_file *seq, loff_t *pos);
+static void *xve_seq_next(struct seq_file *seq, void *v, loff_t *pos);
+static int xve_seq_show(struct seq_file *seq, void *v);
+static void xve_seq_stop(struct seq_file *seq, void *v);
+static int xve_seq_open(struct inode *inode, struct file *sfile);
+static int xve_proc_open_device_counters(struct inode *inode,
+                                        struct file *file);
+static int xve_proc_read_device_counters(struct seq_file *m, void *data);
+static ssize_t xve_proc_write_device_counters(struct file *file,
+                                             const char __user *buffer,
+                                             size_t count, loff_t *offp);
+static int xve_proc_l2_open_device(struct inode *inode, struct file *file);
+static int xve_proc_l2_read_device(struct seq_file *m, void *data);
+static ssize_t xve_proc_l2_write_device(struct file *file,
+                                       const char __user *buffer,
+                                       size_t count, loff_t *offp);
+static int xve_proc_open_l2_flush(struct inode *inode, struct file *file);
+static int xve_proc_read_l2_flush(struct seq_file *m, void *data);
+static ssize_t xve_proc_write_l2_flush(struct file *file,
+                                      const char __user *buffer, size_t count,
+                                      loff_t *offp);
+
+static const struct file_operations xve_debug_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xve_proc_open_debug,
+       .read = seq_read,
+       .write = xve_proc_write_debug,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations xve_device_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xve_proc_open_device,
+       .read = seq_read,
+       .write = xve_proc_write_device,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations xve_device_counters_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xve_proc_open_device_counters,
+       .read = seq_read,
+       .write = xve_proc_write_device_counters,
+       .llseek = seq_lseek,
+       .release = seq_release,
+};
+
+static const struct file_operations xve_file_ops = {
+       .owner = THIS_MODULE,
+       .open = xve_seq_open,
+       .read = seq_read,
+       .write = xve_proc_write_device_counters,
+       .llseek = seq_lseek,
+       .release = seq_release,
+};
+
+static const struct seq_operations xve_seq_ops = {
+       .start = xve_seq_start,
+       .next = xve_seq_next,
+       .stop = xve_seq_stop,
+       .show = xve_seq_show
+};
+
+static const struct file_operations xve_l2_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xve_proc_l2_open_device,
+       .read = seq_read,
+       .write = xve_proc_l2_write_device,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static const struct file_operations xve_l2_flush_proc_fops = {
+       .owner = THIS_MODULE,
+       .open = xve_proc_open_l2_flush,
+       .read = seq_read,
+       .write = xve_proc_write_l2_flush,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static int xve_proc_l2_read_device(struct seq_file *m, void *data)
+{
+       struct xve_fwt_entry *fwt_entry;
+       struct xve_dev_priv *vp = m->private;
+       struct xve_fwt_s *xve_fwt;
+       struct hlist_head *head;
+       struct hlist_node *n;
+       int i, j, k;
+       char tmp_buf[512];
+       char *smac;
+
+       xve_fwt = &vp->xve_fwt;
+       seq_printf(m,
+                  "Id\tVLAN\tHash\tMAC\t\t\tGUID\t\t\tCMState\t\tQP\tVersion\t\tTx Mb/s\tRx Mb/s\n");
+       seq_printf(m,
+                  "=======================================================");
+       seq_puts(m, "==========================");
+       seq_puts(m, "====================================================\n");
+
+       for (i = vp->sindex, j = vp->jindex; i < XVE_FWT_HASH_LISTS; i++) {
+               head = &xve_fwt->fwt[i];
+               k = 0;
+               hlist_for_each_entry_safe(fwt_entry, n, head, hlist) {
+                       if (xve_fwt_entry_valid(xve_fwt, fwt_entry) == true) {
+                               char *cmstr = "Not Connected";
+                               u16 printed = 0;
+                               struct xve_cm_ctx *tx = NULL, *rx = NULL;
+
+                               j++;
+                               k++;
+                               smac = fwt_entry->smac_addr;
+                               tmp_buf[0] = 0;
+                               print_mgid_buf(tmp_buf,
+                                              (char *)(fwt_entry->dgid.raw));
+                               if (fwt_entry->path) {
+                                       tx = xve_cmtx_get(fwt_entry->path);
+                                       rx = xve_cmrx_get(fwt_entry->path);
+                                       if (tx) {
+                                               u32 rx_rate = 0;
+
+                                               if (test_bit
+                                                   (XVE_FLAG_OPER_UP,
+                                                    &tx->flags))
+                                                       cmstr = "Connected";
+                                               if (rx)
+                                                       rx_rate =
+                                                           rx->stats.rx_rate;
+                                               seq_printf(m,
+                                                          "%d\t%d\t%d\t%2x:%2x:%2x:%2x:%2x:%2x\t%s\t%s\t%x\t%s\t%d\t%d\n",
+                                                          j, fwt_entry->vlan,
+                                                          fwt_entry->
+                                                          hash_value,
+                                                          ALIGN_TO_FF(smac[0]),
+                                                          ALIGN_TO_FF(smac[1]),
+                                                          ALIGN_TO_FF(smac[2]),
+                                                          ALIGN_TO_FF(smac[3]),
+                                                          ALIGN_TO_FF(smac[4]),
+                                                          ALIGN_TO_FF(smac[5]),
+                                                          tmp_buf + 8, cmstr,
+                                                          tx->qp ? tx->qp->
+                                                          qp_num : 0,
+                                                          tx->version,
+                                                          tx->stats.tx_rate,
+                                                          rx_rate);
+                                               printed = 1;
+                                       }
+                               }
+
+                               if (!printed) {
+                                       char buffer[512];
+
+                                       buffer[0] = 0;
+                                       sprintf(buffer,
+                                               "NC Path-%s CM(Tx-%s Rx-%s) ",
+                                               (fwt_entry->path !=
+                                                NULL) ? "Yes" : "No",
+                                               (tx != NULL) ? "Yes" : "No",
+                                               (rx != NULL) ? "Yes" : "No");
+                                       seq_printf(m,
+                                                  "%d\t%d\t%d\t%2x:%2x:%2x:%2x:%2x:%2x\t%s\t%s\n",
+                                                  j, fwt_entry->vlan,
+                                                  fwt_entry->hash_value,
+                                                  ALIGN_TO_FF(smac[0]),
+                                                  ALIGN_TO_FF(smac[1]),
+                                                  ALIGN_TO_FF(smac[2]),
+                                                  ALIGN_TO_FF(smac[3]),
+                                                  ALIGN_TO_FF(smac[4]),
+                                                  ALIGN_TO_FF(smac[5]),
+                                                  tmp_buf + 8, buffer);
+                               }
+                               xve_fwt_put_ctx(&vp->xve_fwt, fwt_entry);
+                       }
+               }
+
+       }
+
+       if (i >= XVE_FWT_HASH_LISTS) {
+               vp->sindex = 0;
+               vp->jindex = 0;
+               seq_puts(m, "\n End of L2 Table\n");
+       } else {
+               seq_puts(m, "\n Table incomplete\n");
+               vp->sindex = i;
+               vp->jindex = j;
+       }
+       return 0;
+}
+
+static ssize_t xve_proc_l2_write_device(struct file *file,
+                                       const char __user *buffer,
+                                       size_t count, loff_t *offp)
+{
+       return count;
+}
+
+static int xve_proc_l2_open_device(struct inode *inode, struct file *file)
+{
+       return single_open(file, xve_proc_l2_read_device, PDE_DATA(inode));
+}
+
+static int xve_proc_read_device(struct seq_file *m, void *data)
+{
+       struct xve_dev_priv *vp = m->private;
+       int i;
+       unsigned long tsecs = 0, tmins = 0, thrs = 0;
+       char tmp_buf[512];
+       char *bcast_mgid_token = vp->bcast_mgid.raw;
+       char *local_gid_token = vp->local_gid.raw;
+
+       if (xve_get_misc_info()) {
+               if (vp->next_page) {
+                       for (i = 0; i < XVE_MISC_MAX_COUNTERS; i++)
+                               seq_printf(m, "%s%u\n", misc_counter_name[i],
+                                          vp->misc_counters[i]);
+                       vp->next_page = 0;
+                       goto out;
+               }
+       }
+
+       seq_printf(m, "Chassis Name:\t\t\t%s\n", vp->xsmp_info.chassis_name);
+       seq_printf(m, "Chassis Version  :\t\t%x\n", vp->xsmp_info.version);
+       seq_printf(m, "Server-Profile:\t\t\t%s\n", vp->xsmp_info.session_name);
+       seq_puts(m, "Config parameters:\n");
+       seq_printf(m, "Mode :\t\t\t\t%s\n", vp->mode);
+       seq_printf(m, "Netid :\t\t\t\t0x%x\n", vp->net_id);
+       if (vp->qp)
+               seq_printf(m, "UD Queue pair Number(QP): \t%d\n",
+                          (vp->qp->qp_num));
+       else
+               seq_printf(m,
+                          "UD Queue pair Number(QP) Not established yet \t\t\n");
+
+       seq_printf(m, "PortDetails:\t\t\tPort:%d pkey:%d  pkey_index:%d\n",
+                  vp->port, vp->pkey, vp->pkey_index);
+
+       tmp_buf[0] = 0;
+       print_mgid_buf(tmp_buf, bcast_mgid_token);
+       seq_printf(m, "Bcast Mgid:\t\t\t%s\n", tmp_buf);
+
+       tmp_buf[0] = 0;
+       print_mgid_buf(tmp_buf, local_gid_token);
+
+       seq_printf(m, "Local gid:\t\t\t%s\n", tmp_buf);
+       seq_printf(m, "MAC addr:\t\t\t0x%Lx\n", vp->mac);
+       seq_printf(m, "VID:\t\t\t\t0x%Lx\n", vp->resource_id);
+       seq_printf(m, "mtu:\t\t\t\t%d\n", vp->netdev->mtu);
+       seq_printf(m, "Admin mtu:\t\t\t%d\n", vp->admin_mtu);
+       seq_printf(m, "MCAST mtu:\t\t\t%d\n", vp->mcast_mtu);
+       seq_printf(m, "IB MAX MTU: \t\t\t%d\n", vp->max_ib_mtu);
+
+       seq_printf(m, "Receive Queue size: \t\t%d\n", xve_recvq_size);
+       seq_printf(m, "Transmit Queue size: \t\t%d\n", xve_sendq_size);
+
+       if (vp->cm_supported) {
+               seq_printf(m, "Num of cm frags: \t\t%d\n", vp->cm.num_frags);
+               seq_printf(m, "CM mtu  \t\t\t%d\n", vp->cm.max_cm_mtu);
+       }
+
+       seq_puts(m, "\n");
+       seq_printf(m, "link/xsmp hndl:\t\t\t%p\n", vp->xsmp_hndl);
+       seq_printf(m, "Port link state: \t\t%s\n",
+                  test_bit(XVE_PORT_LINK_UP, &vp->state) ? "Up" : "Down");
+
+       if (vp->broadcast) {
+               seq_puts(m, "Multicast Report:\n");
+               seq_printf(m, "Flag:                \t\t%lx\n",
+                               vp->broadcast->flags);
+               seq_printf(m, "join state:              \t\t%s\n",
+                          test_bit(XVE_MCAST_FLAG_ATTACHED,
+                                   &vp->broadcast->
+                                   flags) ? "Joined" : "Not joined");
+       } else {
+               seq_puts(m, "Multicast Not created:\n");
+       }
+
+       strcpy(tmp_buf, "None");
+       if (vp->mp_flag & MP_XVE_PRIMARY) {
+               strcpy(tmp_buf, "Primary");
+               if (vp->mp_flag & MP_XVE_AUTO_SWITCH)
+                       strcat(tmp_buf, " + AutoSwitchover");
+       } else if (vp->mp_flag & MP_XVE_SECONDARY) {
+               strcpy(tmp_buf, "Secondary");
+               if (vp->mp_flag & MP_XVE_AUTO_SWITCH)
+                       strcat(tmp_buf, " + AutoSwitchover");
+       }
+
+       seq_printf(m, "HA flags:\t\t\t%s\n", tmp_buf);
+       seq_printf(m, "TSO:\t\t\t\t%s\n",
+                  (vp->netdev->
+                   features & NETIF_F_TSO) ? "Enabled" : "Disabled");
+       seq_printf(m, "LRO:\t\t\t\t%s\n",
+                  (vp->netdev->
+                   features & NETIF_F_LRO) ? "Enabled" : "Disabled");
+
+       if (test_bit(XVE_OPER_REP_SENT, &vp->state)) {
+
+               tsecs = jiffies_to_msecs(jiffies - vp->jiffies) / 1000;
+               thrs = tsecs / (60 * 60);
+               tmins = (tsecs / 60 - (thrs * 60));
+               tsecs = tsecs - (tmins * 60) - (thrs * 60 * 60);
+       }
+
+       seq_printf(m, "XVE Uptime:\t\t\t%lu hrs %lu mins %lu seconds\n",
+                  thrs, tmins, tsecs);
+       seq_puts(m, "\n");
+
+       seq_printf(m, "Netdev state:\t\t\t0x%lu\n", vp->netdev->state);
+       seq_printf(m, "Netdev napi state:\t\t0x%lu\n", vp->napi.state);
+       seq_printf(m, "VNIC state:\t\t\t0x%x\n", (unsigned int)vp->state);
+       seq_printf(m, "VNIC Flag:\t\t\t0x%x\n", (unsigned int)vp->flags);
+
+       tmp_buf[0] = 0;
+       if (netif_running(vp->netdev))
+               strcat(tmp_buf, "dev running");
+       else
+               strcat(tmp_buf, "netif not running");
+       if (netif_queue_stopped(vp->netdev))
+               strcat(tmp_buf, " + dev stopped");
+       else
+               strcat(tmp_buf, " + dev not stopped");
+
+       seq_printf(m, "%s\n\n", tmp_buf);
+
+       seq_printf(m, "Carrier state:\t\t\t%s\n",
+                  netif_carrier_ok(vp->netdev) ? "Up" : "Down");
+
+       seq_printf(m, "VNIC up:\t\t\t%s\n",
+                  test_bit(XVE_OPER_UP, &vp->state) ? "Yes" : "No");
+
+       tmp_buf[0] = 0;
+       if (test_bit(XVE_OPER_UP, &vp->state))
+               strcat(tmp_buf, "Oper Up");
+       else
+               strcat(tmp_buf, "Oper Down");
+       if (test_bit(XVE_OS_ADMIN_UP, &vp->state))
+               strcat(tmp_buf, " + OS Admin Up");
+       else
+               strcat(tmp_buf, " + OS Admin Down");
+       if (test_bit(XVE_PORT_LINK_UP, &vp->state))
+               strcat(tmp_buf, " + Port Link Up");
+       else
+               strcat(tmp_buf, " + Port Link Down");
+       if (test_bit(XVE_OPER_REP_SENT, &vp->state))
+               strcat(tmp_buf, " + Oper Sent");
+       else
+               strcat(tmp_buf, " + No Oper Rep");
+
+       if (test_bit(XVE_INTR_ENABLED, &vp->state))
+               strcat(tmp_buf, " + Rx Intr Enabled");
+       else
+               strcat(tmp_buf, " + Rx Intr Disabled");
+
+       if (test_bit(XVE_RX_NOBUF, &vp->state))
+               strcat(tmp_buf, " + Rx No Buf");
+
+       if (test_bit(XVE_IBLINK_DOWN, &vp->state))
+               strcat(tmp_buf, " +  IB Link Down");
+       else
+               strcat(tmp_buf, " +  IB Link Up");
+
+       if (test_bit(XVE_IB_DEV_OPEN, &vp->flags))
+               strcat(tmp_buf, " +  IB Device Opened");
+       else
+               strcat(tmp_buf, " +  IB Device Not Opened");
+
+       if (test_bit(XVE_OVER_QUOTA, &vp->state))
+               strcat(tmp_buf, " +  No RX Quota");
+
+       seq_printf(m, "%s\n\n", tmp_buf);
+
+       if (vp->work_queue_failed != 0)
+               seq_printf(m, "WQ Failed:\t\t\t%ld\n", vp->work_queue_failed);
+
+       seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared);
+       vp->next_page = 1;
+out:
+       return 0;
+}
+
+static ssize_t xve_proc_write_device(struct file *file,
+                                    const char __user *buffer, size_t count,
+                                    loff_t *offp)
+{
+       struct xve_dev_priv *vp = PDE_DATA(file_inode(file));
+       char action[64];
+       int ret;
+
+       ret = sscanf(buffer, "%s", action);
+       if (ret != 1) {
+               return -EINVAL;
+       }
+       if ((strlen(action) == 1) && (atoi(action) == 0)) {
+               /* Clear counters */
+               memset(vp->counters, 0, sizeof(vp->counters));
+               vp->counters_cleared++;
+               return count;
+       }
+
+       /*
+        * sscanf cannot copies spaces as in "rbatch on" so do a copy
+        */
+       memset(action, 0, sizeof(action));
+       strncpy(action, buffer, 10);
+
+       if (strcmp(action, XS_RESCHED_NAPI) == 0)
+               set_bit(XVE_TRIGGER_NAPI_SCHED, &vp->state);
+
+       return count;
+}
+
+static int xve_proc_open_device(struct inode *inode, struct file *file)
+{
+       return single_open(file, xve_proc_read_device, PDE_DATA(inode));
+}
+
+static int xve_proc_read_device_counters(struct seq_file *m, void *data)
+{
+       struct xve_dev_priv *vp = (struct xve_dev_priv *)m->private;
+       int i;
+
+       for (i = 0; i < XVE_MAX_COUNTERS; i++)
+               seq_printf(m, "%s%u\n", counter_name[i], vp->counters[i]);
+       seq_printf(m, "Counters cleared count:\t\t%u\n", vp->counters_cleared);
+
+       return 0;
+}
+
+static ssize_t xve_proc_write_device_counters(struct file *file,
+                                             const char __user *buffer,
+                                             size_t count, loff_t *offp)
+{
+       struct xve_dev_priv *vp = PDE_DATA(file_inode(file));
+       int newval, ret;
+       char    *buf = (char *) __get_free_page(GFP_USER);
+       if (!buf) {
+               return -ENOMEM;
+       }
+
+        if (copy_from_user(buf, buffer, count - 1)) {
+               goto out;
+       }
+       buf[count] = '\0';
+
+       ret = kstrtoint(buf, 0, &newval);
+        if (ret != 0) {
+                return -EINVAL;
+        }
+
+        if (newval == 0) {
+                /* Clear counters */
+                memset(vp->counters, 0, sizeof(vp->counters));
+                vp->counters_cleared++;
+        }
+        return count;
+
+out:
+        free_page((unsigned long)buf);
+        return -EINVAL;
+}
+
+static int xve_proc_open_device_counters(struct inode *inode, struct file *file)
+{
+       return single_open(file, xve_proc_read_device_counters,
+                       PDE_DATA(inode));
+}
+
+static int xve_proc_read_l2_flush(struct seq_file *m, void *data)
+{
+       seq_puts(m, "flush: Nothing to read\n");
+       return 0;
+}
+
+static ssize_t xve_proc_write_l2_flush(struct file *file,
+                                      const char __user *buffer, size_t count,
+                                      loff_t *offp)
+{
+       struct xve_dev_priv *priv = PDE_DATA(file_inode(file));
+
+       pr_info("%s XVE flushing l2 %s\n", __func__, priv->xve_name);
+       xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+
+       return count;
+}
+
+static int xve_proc_open_l2_flush(struct inode *inode, struct file *file)
+{
+       return single_open(file, xve_proc_read_l2_flush, PDE_DATA(inode));
+}
+
+static void *xve_seq_start(struct seq_file *seq, loff_t *pos)
+{
+       return (*pos < XVE_MAX_COUNTERS) ? &counter_name[*pos] : 0;
+}
+
+static void *xve_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+       ++*pos;
+       return (*pos < XVE_MAX_COUNTERS) ? &counter_name[*pos] : 0;
+}
+
+static int xve_seq_show(struct seq_file *seq, void *v)
+{
+       struct xve_dev_priv *vp = seq->private;
+
+       if (vp->ix == XVE_MAX_COUNTERS)
+               vp->ix = 0;
+
+       seq_printf(seq, "%s %u\n", counter_name[vp->ix], vp->counters[vp->ix]);
+       vp->ix++;
+
+       return 0;
+}
+
+static void xve_seq_stop(struct seq_file *seq, void *v)
+{
+       /* Nothing to be done here */
+}
+
+static int xve_seq_open(struct inode *inode, struct file *sfile)
+{
+       struct seq_file *seq;
+       int ret_val;
+
+       ret_val = seq_open(sfile, &xve_seq_ops);
+       if (!ret_val) {
+               /* recover the pointer buried in proc_dir_entry data */
+               seq = sfile->private_data;
+               seq->private = PDE_DATA(inode);
+       }
+
+       return ret_val;
+};
+
+int xve_add_proc_entry(struct xve_dev_priv *vp)
+{
+       struct proc_dir_entry *file, *l2, *flush, *counter;
+       int ret = 0;
+
+       vp->nic_dir = xg_create_proc_entry(vp->proc_name, S_IFDIR,
+                                          proc_root_xve_dev, 1);
+
+       if (!vp->nic_dir) {
+               pr_info("Unable to create the xve nicentry\n");
+               return -ENOMEM;
+       }
+       file = proc_create_data(vp->xve_name, S_IFREG, vp->nic_dir,
+                               &xve_device_proc_fops, vp);
+       if (!file) {
+               pr_info("Unable to create the xve /proc entry\n");
+               ret = -ENOMEM;
+               goto err_dev_entry;
+       }
+       if (xs_seq_file) {
+               /* Using proc seq_file for OVM */
+               counter = proc_create_data("counters", S_IFREG, vp->nic_dir,
+                                          &xve_file_ops, vp);
+       } else
+               counter = proc_create_data("counters", S_IFREG, vp->nic_dir,
+                                          &xve_device_counters_proc_fops, vp);
+       if (!counter) {
+               pr_info("Unable to create the xve /proc entry\n");
+               return -ENOMEM;
+               goto err_counter;
+       }
+
+       l2 = proc_create_data("l2table", S_IFREG, vp->nic_dir,
+                             &xve_l2_proc_fops, vp);
+       if (!l2) {
+               pr_info("Unable to create the xve /proc l2 entry\n");
+               ret = -ENOMEM;
+               goto err_l2table;
+       }
+       /*
+        * Create flush entry
+        */
+       flush = proc_create_data("flush_l2", S_IFREG, vp->nic_dir,
+                                &xve_l2_flush_proc_fops, vp);
+       if (!flush) {
+               pr_info("Unable to create the xve /proc flush entry\n");
+               ret = -ENOMEM;
+               goto err_flush;
+       }
+       return 0;
+err_counter:
+       remove_proc_entry("counters", vp->nic_dir);
+err_flush:
+       remove_proc_entry("l2table", vp->nic_dir);
+err_l2table:
+       remove_proc_entry(vp->xve_name, vp->nic_dir);
+err_dev_entry:
+       remove_proc_entry(vp->proc_name, proc_root_xve_dev);
+       return ret;
+}
+
+void xve_remove_proc_entry(struct xve_dev_priv *vp)
+{
+       remove_proc_entry("counters", vp->nic_dir);
+       remove_proc_entry("flush_l2", vp->nic_dir);
+       remove_proc_entry("l2table", vp->nic_dir);
+       remove_proc_entry(vp->xve_name, vp->nic_dir);
+       remove_proc_entry(vp->proc_name, proc_root_xve_dev);
+}
+
+static ssize_t xve_proc_write_debug(struct file *file,
+                                   const char __user *buffer, size_t count,
+                                   loff_t *offp)
+{
+       int newval, ret;
+       char    *buf = (char *) __get_free_page(GFP_USER);
+       if (!buf) {
+               return -ENOMEM;
+       }
+
+       if (copy_from_user(buf, buffer, count - 1)) {
+               goto out;
+       }
+       buf[count] = '\0';
+
+       ret = kstrtoint(buf, 0, &newval);
+       if (ret != 0) {
+               return -EINVAL;
+       }
+       xve_debug_level = newval;
+       return count;
+
+out:
+       free_page((unsigned long)buf);
+       return -EINVAL;
+}
+
+static int xve_proc_read_debug(struct seq_file *m, void *data)
+{
+       int i;
+
+       seq_printf(m, "Debug bitmask: 0x%x\n\n", xve_debug_level);
+       for (i = 0; i < XVE_MAX_GLOB_COUNTERS; i++)
+               seq_printf(m, "%s%d\n", glob_counter_name[i], xve_counters[i]);
+       return 0;
+}
+
+static int xve_proc_open_debug(struct inode *inode, struct file *file)
+{
+       return single_open(file, xve_proc_read_debug, PDE_DATA(inode));
+}
+
+int xve_create_procfs_root_entries(void)
+{
+       struct proc_dir_entry *debug_file;
+       int ret = 0;
+
+       proc_root_xve =
+           xg_create_proc_entry("driver/xve", S_IFDIR, NULL, 0);
+
+       if (!proc_root_xve) {
+               pr_info("Unable to create /proc/driver/xve\n");
+               return -ENOMEM;
+       }
+
+       proc_root_xve_dev = xg_create_proc_entry("devices", S_IFDIR,
+                                                proc_root_xve, 1);
+       if (!proc_root_xve_dev) {
+               pr_info("Unable to create /proc/driver/xve/devices\n");
+               ret = -ENOMEM;
+               goto create_proc_end_1;
+       }
+       debug_file = proc_create_data("debug", S_IFREG, proc_root_xve,
+                                     &xve_debug_proc_fops, NULL);
+       if (!debug_file) {
+               pr_info("Unable to create /proc/driver/xve/debug\n");
+               ret = -ENOMEM;
+               goto create_proc_end_2;
+       }
+       return 0;
+
+create_proc_end_2:
+       remove_proc_entry("devices", proc_root_xve_dev);
+create_proc_end_1:
+       remove_proc_entry("driver/xve", NULL);
+       return ret;
+}
+
+void xve_remove_procfs_root_entries(void)
+{
+       remove_proc_entry("debug", proc_root_xve);
+       remove_proc_entry("devices", proc_root_xve);
+       xg_remove_proc_entry("driver/xve", NULL);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_tables.c b/drivers/infiniband/ulp/xsigo/xve/xve_tables.c
new file mode 100644 (file)
index 0000000..eea4854
--- /dev/null
@@ -0,0 +1,419 @@
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+#include <linux/pkt_sched.h>
+#include <linux/random.h>
+
+static int xve_age_path = 1;
+module_param(xve_age_path, int, 0644);
+MODULE_PARM_DESC(xve_age_path, "Age path enable/disable if no fwt entries");
+
+u32 xve_hash_salt __read_mostly;
+static struct kmem_cache *xve_fwt_cache __read_mostly;
+
+struct xve_advert_hdr {
+       __be16 type;
+       __be16 count;
+       union ib_gid gid;
+       u32 qpn;
+} __attribute__ ((__packed__));
+
+int xve_tables_init(void)
+{
+       get_random_bytes(&xve_hash_salt, sizeof(xve_hash_salt));
+       xve_fwt_cache =
+           kmem_cache_create("xve_fwt_cache", sizeof(struct xve_fwt_entry), 0,
+                             SLAB_HWCACHE_ALIGN, NULL);
+       if (!xve_fwt_cache)
+               return -ENOMEM;
+
+       return 0;
+}
+
+void xve_fwt_init(struct xve_fwt_s *xve_fwt)
+{
+
+       int i;
+
+       spin_lock_init(&xve_fwt->lock);
+       for (i = 0; i < XVE_FWT_HASH_LISTS; i++)
+               INIT_HLIST_HEAD(&xve_fwt->fwt[i]);
+       xve_fwt->num = 0;
+
+}
+
+static int xve_mac_hash(const unsigned char *mac, int size, u16 vlan)
+{
+       return hash_bytes(mac, ETH_ALEN, vlan ^ xve_hash_salt) & (size - 1);
+}
+
+static struct xve_fwt_entry *xve_fwt_find_entry(struct hlist_head *head,
+                                               const unsigned char *mac,
+                                               u16 vlan)
+{
+       struct xve_fwt_entry *fwt_entry;
+
+       hlist_for_each_entry(fwt_entry, head, hlist) {
+               if (fwt_entry->vlan == vlan
+                   && ether_addr_equal(fwt_entry->smac_addr, mac))
+                       return fwt_entry;
+       }
+       return NULL;
+}
+
+static struct xve_fwt_entry *xve_fwt_find_valid(struct hlist_head *head)
+{
+       struct xve_fwt_entry *fwt_entry;
+
+       hlist_for_each_entry(fwt_entry, head, hlist) {
+               if (test_bit(XVE_FWT_ENTRY_VALID, &fwt_entry->state))
+                       return fwt_entry;
+       }
+       return NULL;
+}
+
+struct xve_fwt_entry *xve_fwt_list(struct xve_fwt_s *xve_fwt, int val)
+{
+       struct hlist_head *head;
+       struct xve_fwt_entry *fwt_entry = NULL;
+       unsigned long flags = 0;
+
+       spin_lock_irqsave(&xve_fwt->lock, flags);
+       head = &xve_fwt->fwt[val];
+       if (head != NULL)
+               fwt_entry = xve_fwt_find_valid(head);
+       if (fwt_entry)
+               atomic_inc(&fwt_entry->ref_cnt);
+       spin_unlock_irqrestore(&xve_fwt->lock, flags);
+       return fwt_entry;
+}
+
+bool xve_fwt_entry_valid(struct xve_fwt_s *xve_fwt,
+                        struct xve_fwt_entry *fwt_entry)
+{
+       int ret = true;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xve_fwt->lock, flags);
+       if ((fwt_entry != NULL)
+           && test_bit(XVE_FWT_ENTRY_VALID, &fwt_entry->state))
+               atomic_inc(&fwt_entry->ref_cnt);
+       else
+               ret = false;
+       spin_unlock_irqrestore(&xve_fwt->lock, flags);
+
+       return ret;
+}
+
+int xve_aging_task_machine(struct xve_dev_priv *priv)
+{
+       unsigned long flags;
+       struct xve_fwt_entry *fwt_entry;
+       struct xve_path *path;
+       struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+       int i;
+       char *smac;
+       union ib_gid dgid;
+       int is_list_empty = 0;
+       struct hlist_head *head;
+       struct hlist_node *n;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       if (!test_bit(XVE_OS_ADMIN_UP, &priv->state) ||
+           test_bit(XVE_DELETING, &priv->state)) {
+               spin_unlock_irqrestore(&priv->lock, flags);
+               return 0;
+       }
+       spin_unlock_irqrestore(&priv->lock, flags);
+
+       priv->counters[XVE_MAC_AGED_CHECK]++;
+       for (i = 0; i < XVE_FWT_HASH_LISTS; i++) {
+               head = &xve_fwt->fwt[i];
+               hlist_for_each_entry_safe(fwt_entry, n, head, hlist) {
+                       if (xve_fwt_entry_valid(xve_fwt, fwt_entry) == true) {
+                               smac = fwt_entry->smac_addr;
+                               if (!test_and_clear_bit
+                                   (XVE_FWT_ENTRY_REFRESH, &fwt_entry->state)
+                                   && ((jiffies - fwt_entry->last_refresh) >=
+                                       priv->aging_delay)) {
+                                       pr_info("XVE: %s MAC ", priv->xve_name);
+                                       pr_info("%02x:%02x:%02x:%02x:%02x:%02x",
+                                               ALIGN_TO_FF(smac[0]),
+                                               ALIGN_TO_FF(smac[1]),
+                                               ALIGN_TO_FF(smac[2]),
+                                               ALIGN_TO_FF(smac[3]),
+                                               ALIGN_TO_FF(smac[4]),
+                                               ALIGN_TO_FF(smac[5]));
+                                       pr_info(" vlan %d Aged out\n",
+                                               fwt_entry->vlan);
+                                       /*
+                                        * Can there be a race here where path
+                                        *  becomes a bad address when paths
+                                        *  gets flushed??
+                                        */
+                                       spin_lock_irqsave(&priv->lock, flags);
+                                       xve_remove_fwt_entry(priv, fwt_entry);
+                                       path = fwt_entry->path;
+                                       if (path) {
+                                               memcpy(dgid.raw,
+                                                      path->pathrec.dgid.raw,
+                                                      sizeof(dgid));
+                                               if (list_empty(&path->fwt_list))
+                                                       is_list_empty = 1;
+                                       }
+                                       spin_unlock_irqrestore(&priv->lock,
+                                                              flags);
+                                       if (xve_age_path && is_list_empty)
+                                               xve_flush_single_path_by_gid
+                                                   (priv->netdev, &dgid);
+                                       xve_fwt_put_ctx(xve_fwt, fwt_entry);
+                                       xve_fwt_entry_free(priv, fwt_entry);
+                                       priv->counters[XVE_MAC_AGED_COUNTER]++;
+                               } else {
+                                       priv->counters[XVE_MAC_STILL_INUSE]++;
+                                       xve_fwt_put_ctx(xve_fwt, fwt_entry);
+                               }
+                       } else {
+                               priv->counters[XVE_MAC_AGED_NOMATCHES]++;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+struct xve_fwt_entry *xve_fwt_lookup(struct xve_fwt_s *xve_fwt, char *mac,
+                                    u16 vlan, int refresh)
+{
+       unsigned long flags;
+       struct hlist_head *head;
+       struct xve_fwt_entry *fwt_entry;
+
+       spin_lock_irqsave(&xve_fwt->lock, flags);
+       head = &xve_fwt->fwt[xve_mac_hash(mac, XVE_FWT_HASH_LISTS, vlan)];
+       fwt_entry = xve_fwt_find_entry(head, mac, vlan);
+       if (fwt_entry) {
+               atomic_inc(&fwt_entry->ref_cnt);
+               if (refresh)
+                       set_bit(XVE_FWT_ENTRY_REFRESH, &fwt_entry->state);
+               fwt_entry->last_refresh = jiffies;
+       } else {
+               xve_debug(DEBUG_TABLE_INFO, NULL,
+                         "%s No match for %02x%02x%02x%02x%02x%02x vlan %d\n",
+                         __func__, mac[0], mac[1], mac[2], mac[3], mac[4],
+                         mac[5], vlan);
+       }
+       spin_unlock_irqrestore(&xve_fwt->lock, flags);
+       return fwt_entry;
+}
+
+void xve_fwt_put_ctx(struct xve_fwt_s *xve_fwt, struct xve_fwt_entry *fwt_entry)
+{
+       if (fwt_entry)
+               atomic_dec(&fwt_entry->ref_cnt);
+}
+
+void xve_fwt_insert(struct xve_dev_priv *priv, struct xve_cm_ctx *ctx,
+                   union ib_gid *gid, u32 qpn, char *smac, u16 vlan)
+{
+       struct hlist_head *head;
+       struct xve_fwt_entry *fwt_entry;
+       unsigned long flags, flags1;
+       struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+       struct xve_path *path;
+       char from[64], to[64];
+
+       fwt_entry = xve_fwt_lookup(xve_fwt, smac, vlan, 1);
+       if (fwt_entry) {
+               if (unlikely
+                   (memcmp
+                    (fwt_entry->dgid.raw, gid->raw, sizeof(union ib_gid)))) {
+                       print_mgid_buf(from, (char *)fwt_entry->dgid.raw);
+                       print_mgid_buf(to, (char *)gid->raw);
+                       pr_info("XVE: %s MAC %02x:%02x:%02x:%02x:%02x:%02x ",
+                               priv->xve_name, ALIGN_TO_FF(smac[0]),
+                               ALIGN_TO_FF(smac[1]), ALIGN_TO_FF(smac[2]),
+                               ALIGN_TO_FF(smac[3]), ALIGN_TO_FF(smac[4]),
+                               ALIGN_TO_FF(smac[5]));
+                       pr_info(" vlan %d moved from GID %s to GID %s\n",
+                               fwt_entry->vlan, from, to);
+
+                       priv->counters[XVE_MAC_MOVED_COUNTER]++;
+
+                       memcpy(fwt_entry->dgid.raw, gid->raw,
+                              sizeof(union ib_gid));
+                       /*
+                        * We need to hold priv->lock
+                        */
+                       spin_lock_irqsave(&priv->lock, flags);
+                       spin_lock_irqsave(&xve_fwt->lock, flags1);
+                       if (fwt_entry->path)
+                               list_del(&fwt_entry->list);
+                       fwt_entry->path = NULL;
+                       path = __path_find(priv->netdev, gid->raw);
+                       if (path) {
+                               fwt_entry->path = path;
+                               list_add_tail(&fwt_entry->list,
+                                             &path->fwt_list);
+                       }
+                       spin_unlock_irqrestore(&xve_fwt->lock, flags1);
+                       spin_unlock_irqrestore(&priv->lock, flags);
+               }
+               if (qpn && unlikely(fwt_entry->dqpn != qpn))
+                       fwt_entry->dqpn = qpn;
+               /* Insert CM rx in the path */
+               if (fwt_entry->path && ctx)
+                       fwt_entry->path->cm_ctx_rx = ctx;
+               xve_fwt_put_ctx(xve_fwt, fwt_entry);
+       } else {
+               fwt_entry =
+                   kmem_cache_alloc(xve_fwt_cache, GFP_ATOMIC | __GFP_ZERO);
+               if (!fwt_entry) {
+                       pr_warn("xve_fwt_entry_alloc() failed\n");
+                       return;
+               }
+               memset(fwt_entry, 0, sizeof(struct xve_fwt_entry));
+               print_mgid_buf(from, (char *)gid->raw);
+               pr_info("XVE: %s MAC %02x:%02x:%02x:%02x:%02x:%02x",
+                       priv->xve_name, ALIGN_TO_FF(smac[0]),
+                       ALIGN_TO_FF(smac[1]),
+                       ALIGN_TO_FF(smac[2]), ALIGN_TO_FF(smac[3]),
+                       ALIGN_TO_FF(smac[4]), ALIGN_TO_FF(smac[5]));
+               pr_info("vlan %d learned from GID %s, mode: %s Fwt %p\n",
+                       vlan, from, qpn ? "UD" : "RC", fwt_entry);
+               priv->counters[XVE_MAC_LEARN_COUNTER]++;
+               memcpy(fwt_entry->dgid.raw, gid->raw, sizeof(union ib_gid));
+               fwt_entry->dqpn = qpn;
+               ether_addr_copy(fwt_entry->smac_addr, smac);
+               fwt_entry->vlan = vlan;
+               set_bit(XVE_FWT_ENTRY_REFRESH, &fwt_entry->state);
+               fwt_entry->last_refresh = jiffies;
+               set_bit(XVE_FWT_ENTRY_VALID, &fwt_entry->state);
+               spin_lock_irqsave(&xve_fwt->lock, flags);
+               fwt_entry->hash_value =
+                   xve_mac_hash(smac, XVE_FWT_HASH_LISTS, vlan);
+               head =
+                   &xve_fwt->fwt[xve_mac_hash(smac, XVE_FWT_HASH_LISTS, vlan)];
+               hlist_add_head(&fwt_entry->hlist, head);
+               xve_fwt->num++;
+               spin_unlock_irqrestore(&xve_fwt->lock, flags);
+       }
+}
+
+void xve_remove_fwt_entry(struct xve_dev_priv *priv,
+                         struct xve_fwt_entry *fwt_entry)
+{
+       struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xve_fwt->lock, flags);
+       xve_debug(DEBUG_TABLE_INFO, priv, "%s Deleting FWT From list %p\n",
+                 __func__, fwt_entry);
+       if (fwt_entry->path)
+               list_del(&fwt_entry->list);
+       hlist_del(&fwt_entry->hlist);
+       xve_fwt->num--;
+       spin_unlock_irqrestore(&xve_fwt->lock, flags);
+}
+
+void xve_fwt_entry_free(struct xve_dev_priv *priv,
+                       struct xve_fwt_entry *fwt_entry)
+{
+       unsigned long begin;
+       /*
+        * Wait for refernce count to goto zero (Use kref which is better)
+        */
+       begin = jiffies;
+
+       while (atomic_read(&fwt_entry->ref_cnt)) {
+               xve_debug(DEBUG_TABLE_INFO, priv,
+                         "%s Waiting for ref cnt to become zero %p\n",
+                         __func__, fwt_entry);
+               if (time_after(jiffies, begin + 5 * HZ)) {
+                       xve_warn(priv,
+                                "timing out fwt_entry still in use %p\n",
+                                fwt_entry);
+                       break;
+               }
+               msleep(20);
+       }
+       kmem_cache_free(xve_fwt_cache, fwt_entry);
+}
+
+void xve_fwt_entry_destroy(struct xve_dev_priv *priv,
+                          struct xve_fwt_entry *fwt_entry)
+{
+       xve_remove_fwt_entry(priv, fwt_entry);
+       xve_fwt_entry_free(priv, fwt_entry);
+}
+
+void xve_fwt_cleanup(struct xve_dev_priv *priv)
+{
+       struct xve_fwt_s *xve_fwt = &priv->xve_fwt;
+       int i;
+       struct hlist_head *head;
+       struct hlist_node *n;
+       struct xve_fwt_entry *fwt_entry;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xve_fwt->lock, flags);
+       for (i = 0; i < XVE_FWT_HASH_LISTS; i++) {
+               head = &xve_fwt->fwt[i];
+               hlist_for_each_entry_safe(fwt_entry, n, head, hlist) {
+                       hlist_del(&fwt_entry->hlist);
+                       kmem_cache_free(xve_fwt_cache, fwt_entry);
+                       xve_fwt->num--;
+               }
+       }
+       pr_info("XVE: %s Forwarding table cleaned up for %s",
+               __func__, priv->xve_name);
+       pr_info("number of entries %d\n", xve_fwt->num);
+       spin_unlock_irqrestore(&xve_fwt->lock, flags);
+}
+
+void xve_prepare_skb(struct xve_dev_priv *priv, struct sk_buff *skb)
+{
+       skb->protocol = eth_type_trans(skb, priv->netdev);
+       skb->dev = priv->netdev;
+       skb_pkt_type(skb, PACKET_HOST);
+       if (test_bit(XVE_FLAG_CSUM, &priv->flags))
+               skb->ip_summed = CHECKSUM_UNNECESSARY;
+       skb->truesize = skb->len + sizeof(struct sk_buff);
+}
+
+void xve_tables_exit(void)
+{
+       kmem_cache_destroy(xve_fwt_cache);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_verbs.c b/drivers/infiniband/ulp/xsigo/xve/xve_verbs.c
new file mode 100644 (file)
index 0000000..ad9d6be
--- /dev/null
@@ -0,0 +1,324 @@
+/*
+ * Copyright (c) 2011-2012 Xsigo Systems. All rights reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "xve.h"
+#include "xve_compat.h"
+
+int xve_mcast_attach(struct net_device *dev, u16 mlid, union ib_gid *mgid,
+                    int set_qkey)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_qp_attr *qp_attr = NULL;
+       int ret;
+       u16 pkey_index;
+
+       if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) {
+               clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+               ret = -ENXIO;
+               goto out;
+       }
+       set_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+
+       if (set_qkey) {
+               ret = -ENOMEM;
+               qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
+               if (!qp_attr)
+                       goto out;
+
+               /* set correct QKey for QP */
+               qp_attr->qkey = priv->qkey;
+               ret = ib_modify_qp(priv->qp, qp_attr, IB_QP_QKEY);
+               if (ret) {
+                       xve_warn(priv, "failed to modify QP, ret = %d\n", ret);
+                       goto out;
+               }
+       }
+
+       /* attach QP to multicast group */
+       ret = ib_attach_mcast(priv->qp, mgid, mlid);
+       if (ret)
+               xve_warn(priv,
+                        "failed to attach to multicast group, ret = %d\n",
+                        ret);
+
+out:
+       kfree(qp_attr);
+       return ret;
+}
+
+int xve_init_qp(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int ret;
+       struct ib_qp_attr qp_attr;
+       int attr_mask;
+
+       if (!test_bit(XVE_PKEY_ASSIGNED, &priv->flags))
+               return -1;
+
+       qp_attr.qp_state = IB_QPS_INIT;
+       qp_attr.qkey = 0;
+       qp_attr.port_num = priv->port;
+       qp_attr.pkey_index = priv->pkey_index;
+       attr_mask = IB_QP_QKEY | IB_QP_PORT | IB_QP_PKEY_INDEX | IB_QP_STATE;
+       ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to modify QP to init, ret = %d\n", ret);
+               goto out_fail;
+       }
+
+       qp_attr.qp_state = IB_QPS_RTR;
+       /* Can't set this in a INIT->RTR transition */
+       attr_mask &= ~IB_QP_PORT;
+       ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to modify QP to RTR, ret = %d\n", ret);
+               goto out_fail;
+       }
+
+       qp_attr.qp_state = IB_QPS_RTS;
+       qp_attr.sq_psn = 0;
+       attr_mask |= IB_QP_SQ_PSN;
+       attr_mask &= ~IB_QP_PKEY_INDEX;
+       ret = ib_modify_qp(priv->qp, &qp_attr, attr_mask);
+       if (ret) {
+               xve_warn(priv, "failed to modify QP to RTS, ret = %d\n", ret);
+               goto out_fail;
+       }
+
+       return 0;
+
+out_fail:
+       qp_attr.qp_state = IB_QPS_RESET;
+       if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+               xve_warn(priv, "Failed to modify QP to RESET state\n");
+
+       return ret;
+}
+
+int xve_transport_dev_init(struct net_device *dev, struct ib_device *ca)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       struct ib_qp_init_attr init_attr = {
+               .cap = {
+                       .max_send_wr = xve_sendq_size,
+                       .max_recv_wr = xve_recvq_size,
+                       .max_send_sge = 1,
+                       .max_recv_sge = XVE_UD_RX_SG},
+               .sq_sig_type = IB_SIGNAL_ALL_WR,
+               .qp_type = IB_QPT_UD
+       };
+
+       int ret, size;
+       int i;
+       struct ethtool_coalesce *coal;
+
+       priv->pd = ib_alloc_pd(priv->ca);
+       if (IS_ERR(priv->pd)) {
+               pr_warn("%s: failed to allocate PD for %s\n",
+                       ca->name, priv->xve_name);
+               return -ENODEV;
+       }
+
+       priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
+       if (IS_ERR(priv->mr)) {
+               pr_warn("%s: ib_get_dma_mr failed\n", ca->name);
+               goto out_free_pd;
+       }
+
+       size = xve_recvq_size + 1;
+       ret = xve_cm_dev_init(dev);
+       if (ret != 0) {
+               pr_err("%s Failed for %s [ret %d ]\n", __func__,
+                      priv->xve_name, ret);
+               goto out_free_mr;
+       }
+       size += xve_sendq_size;
+       size += xve_recvq_size + 1;     /* 1 extra for rx_drain_qp */
+
+       priv->recv_cq =
+           ib_create_cq(priv->ca, xve_ib_completion, NULL, dev, size, 0);
+       if (IS_ERR(priv->recv_cq)) {
+               pr_warn("%s: failed to create receive CQ for %s\n",
+                       ca->name, priv->xve_name);
+               goto out_free_mr;
+       }
+
+       priv->send_cq = ib_create_cq(priv->ca, xve_send_comp_handler, NULL,
+                                    dev, xve_sendq_size, 0);
+       if (IS_ERR(priv->send_cq)) {
+               pr_warn("%s: failed to create send CQ for %s\n",
+                       ca->name, priv->xve_name);
+               goto out_free_recv_cq;
+       }
+
+       if (ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP))
+               goto out_free_send_cq;
+
+       coal = kzalloc(sizeof(*coal), GFP_KERNEL);
+       if (coal) {
+               coal->rx_coalesce_usecs = 10;
+               coal->tx_coalesce_usecs = 10;
+               coal->rx_max_coalesced_frames = 16;
+               coal->tx_max_coalesced_frames = 16;
+               dev->ethtool_ops->set_coalesce(dev, coal);
+               kfree(coal);
+       }
+
+       init_attr.send_cq = priv->send_cq;
+       init_attr.recv_cq = priv->recv_cq;
+
+       if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
+               init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
+
+       if (dev->features & NETIF_F_SG)
+               init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+
+       priv->qp = ib_create_qp(priv->pd, &init_attr);
+       if (IS_ERR(priv->qp)) {
+               pr_warn("%s: failed to create QP\n", ca->name);
+               goto out_free_send_cq;
+       }
+
+       for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
+               priv->tx_sge[i].lkey = priv->mr->lkey;
+
+       priv->tx_wr.opcode = IB_WR_SEND;
+       priv->tx_wr.sg_list = priv->tx_sge;
+       priv->tx_wr.send_flags = IB_SEND_SIGNALED;
+
+       priv->rx_sge[0].lkey = priv->mr->lkey;
+       if (xve_ud_need_sg(priv->max_ib_mtu)) {
+               priv->rx_sge[0].length = XVE_UD_HEAD_SIZE;
+               priv->rx_sge[1].length = PAGE_SIZE;
+               priv->rx_sge[1].lkey = priv->mr->lkey;
+               priv->rx_wr.num_sge = XVE_UD_RX_SG;
+       } else {
+               priv->rx_sge[0].length = XVE_UD_BUF_SIZE(priv->max_ib_mtu);
+               priv->rx_wr.num_sge = 1;
+       }
+       priv->rx_wr.next = NULL;
+       priv->rx_wr.sg_list = priv->rx_sge;
+
+       return 0;
+
+out_free_send_cq:
+       ib_destroy_cq(priv->send_cq);
+
+out_free_recv_cq:
+       ib_destroy_cq(priv->recv_cq);
+
+out_free_mr:
+       ib_dereg_mr(priv->mr);
+       xve_cm_dev_cleanup(dev);
+
+out_free_pd:
+       ib_dealloc_pd(priv->pd);
+       return -ENODEV;
+}
+
+void xve_transport_dev_cleanup(struct net_device *dev)
+{
+       struct xve_dev_priv *priv = netdev_priv(dev);
+       int ret = 0;
+
+       if (priv->qp) {
+               if (ib_destroy_qp(priv->qp))
+                       xve_warn(priv, "ib_qp_destroy failed\n");
+               priv->qp = NULL;
+               clear_bit(XVE_PKEY_ASSIGNED, &priv->flags);
+       }
+       ret = ib_destroy_cq(priv->send_cq);
+       if (ret)
+               xve_warn(priv, "%s ib_destroy_cq (sendq) failed ret=%d\n",
+                        __func__, ret);
+
+       ret = ib_destroy_cq(priv->recv_cq);
+       if (ret)
+               xve_warn(priv, "%s ib_destroy_cq failed ret=%d\n",
+                        __func__, ret);
+
+       xve_cm_dev_cleanup(dev);
+
+       ret = ib_dereg_mr(priv->mr);
+       if (ret)
+               xve_warn(priv, "%s ib_dereg_mr failed ret=%d\n", __func__, ret);
+
+       ret = ib_dealloc_pd(priv->pd);
+       if (ret)
+               xve_warn(priv, "%s ib_dealloc_pd failed ret=%d\n",
+                        __func__, ret);
+}
+
+void xve_event(struct ib_event_handler *handler, struct ib_event *record)
+{
+       struct xve_dev_priv *priv =
+           container_of(handler, struct xve_dev_priv, event_handler);
+
+       if (record->element.port_num != priv->port)
+               return;
+
+       xve_debug(DEBUG_MCAST_INFO, priv, "Event %d on device %s port %d\n",
+                 record->event, record->device->name,
+                 record->element.port_num);
+
+       switch (record->event) {
+       case IB_EVENT_SM_CHANGE:
+               priv->counters[XVE_SM_CHANGE_COUNTER]++;
+               xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
+               break;
+       case IB_EVENT_CLIENT_REREGISTER:
+               priv->counters[XVE_CLIENT_REREGISTER_COUNTER]++;
+               set_bit(XVE_FLAG_DONT_DETACH_MCAST, &priv->flags);
+               xve_queue_work(priv, XVE_WQ_START_FLUSHLIGHT);
+               break;
+       case IB_EVENT_PORT_ERR:
+               priv->counters[XVE_EVENT_PORT_ERR_COUNTER]++;
+               xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+               break;
+       case IB_EVENT_PORT_ACTIVE:
+               priv->counters[XVE_EVENT_PORT_ACTIVE_COUNTER]++;
+               xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+               break;
+       case IB_EVENT_LID_CHANGE:
+               priv->counters[XVE_EVENT_LID_CHANGE_COUNTER]++;
+               xve_queue_work(priv, XVE_WQ_START_FLUSHNORMAL);
+               break;
+       case IB_EVENT_PKEY_CHANGE:
+               priv->counters[XVE_EVENT_PKEY_CHANGE_COUNTER]++;
+               xve_queue_work(priv, XVE_WQ_START_FLUSHHEAVY);
+               break;
+       default:
+               priv->counters[XVE_INVALID_EVENT_COUNTER]++;
+               break;
+       }
+}
diff --git a/drivers/infiniband/ulp/xsigo/xve/xve_xsmp_msgs.h b/drivers/infiniband/ulp/xsigo/xve/xve_xsmp_msgs.h
new file mode 100644 (file)
index 0000000..43a516e
--- /dev/null
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2006-2012 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XVE_XSMP_MSGS_H__
+#define __XVE_XSMP_MSGS_H__
+
+#define XVE_MAX_NAME_SIZE              16
+#define XVE_MAX_PROC_NAME_SIZE          32
+#define CHASSIS_MAX_NAME_SIZE           32
+#define SESSION_MAX_NAME_SIZE           32
+#define XVE_MAX_HOST_NAME              32
+#define XVE_MP_GROUP_NAME_MAX          (XVE_MAX_NAME_SIZE + XVE_MAX_HOST_NAME)
+
+enum xve_xsmp_cmd_type {
+       XSMP_XVE_INVALID,
+       XSMP_XVE_INSTALL,
+       XSMP_XVE_DELETE,
+       XSMP_XVE_UPDATE,
+       XSMP_XVE_ADMIN_UP,
+       XSMP_XVE_ADMIN_DOWN,
+       XSMP_XVE_OPER_UP,
+       XSMP_XVE_OPER_DOWN,
+       XSMP_XVE_OPER_READY,
+       XSMP_XVE_VLANIP,        /* VLAN and IP address */
+       XSMP_XVE_STATS,         /* XVE driver statistics */
+       XSMP_XVE_SYNC_BEGIN,
+       XSMP_XVE_SYNC_END,
+       XSMP_XVE_INFO_REQUEST,  /* request vnic info  */
+       XSMP_XVE_OPER_FAILED,
+       XSMP_XVE_OPER_REQ,
+       XSMP_XVE_HA_INFO,
+       XSMP_XVE_ISCSI_INFO,
+
+       XSMP_XVE_TYPE_MAX,
+};
+
+/* XVE specific messages */
+
+struct xve_xsmp_msg {
+       union {
+               struct {
+                       u8 type;
+                       u8 code;
+                       u16 length;
+                       u32 bitmask;
+                       u64 resource_id;
+                       u64 tca_guid;
+                       u16 tca_lid;
+                       u16 mac_high;
+                       u32 mac_low;
+                       u16 vn_admin_rate;
+                       u16 admin_state;
+                       u16 encap;
+                       u16 vn_mtu;
+                       u32 install_flag;
+                       u8 xve_name[XVE_MAX_NAME_SIZE];
+                       u16 service_level;      /* SL value for this vnic */
+                       u16 fc_active;  /* 1: enable, 0:
+                                        * disable host rate control */
+                       u16 cir;        /* committed rate in mbps */
+                       u16 pir;        /* peak rate in mbps */
+                       u32 cbs;        /* committed burst size in bytes */
+                       u32 pbs;        /* peak burst size in bytes */
+                       u8 vm_index;    /* the index used by vmware
+                                        * for persistence */
+                       u8 _reserved;
+                       u16 mp_flag;
+                       u8 mp_group[XVE_MP_GROUP_NAME_MAX];
+                       u8 la_flag;     /* linkAggregation flag */
+                       u8 la_policy;
+                       /* for virtual network */
+                       u32 net_id;
+                       u8 vnet_mode;
+               } __packed;
+               u8 bytes[512];
+       };
+} __packed;
+
+/* The reason code for NACKing an install  */
+#define XVE_NACK_DUP_NAME      1       /* duplicate name */
+#define XVE_NACK_DUP_VID       2       /* duplicate VID */
+#define XVE_NACK_LIMIT_REACHED 3       /* Max number of XVEs reached */
+#define XVE_NACK_ALLOCATION_ERROR      4       /* Error during instantiation */
+#define XVE_NACK_CODE_MAX      5
+
+/* The common XVE XSMP header for all messages */
+struct xve_xsmp_header {
+       u8 type;
+       u8 code;
+       u16 length;
+       u32 bitmask;
+       u64 resource_id;
+};
+
+/* Maximum number of dwords in an IP address (v4 or v6) */
+#define MAX_IP_ADDR_DWORDS     4
+
+/* IP address type */
+enum xve_ipaddr_type {
+       XVE_ADDR_TYPE_IPV4 = 1,
+       XVE_ADDR_TYPE_IPV6,
+};
+
+/* Bitmask values for add/delete VLAN notifications */
+#define XVE_ADD_VLAN_NOTIFY            (1 << 0)
+#define XVE_DELETE_VLAN_NOTIFY (1 << 1)
+
+/* Denotes an instance of a VLANID and IP address pair */
+struct xve_xsmp_vlanip_msg {
+       union {
+               struct {
+                       u8 type;
+                       u8 code;
+                       u16 length;
+                       u32 bitmask;
+                       u64 resource_id;
+                       u8 ip_type;
+                       u8 _reserved1;
+                       u16 _reserved2;
+                       u32 vlanid;
+                       u32 ipaddress[MAX_IP_ADDR_DWORDS];
+                       u32 netmask[MAX_IP_ADDR_DWORDS];
+                       /*
+                        * This does not come from chassis but locally generated
+                        */
+                       char ifname[XVE_MAX_NAME_SIZE];
+                       u16 mp_flag;
+               } __packed;
+               u8 bytes[512];
+       };
+};
+
+struct xve_xsmp_stats_msg {
+       union {
+               struct {
+                       u8 type;
+                       u8 code;
+                       u16 length;
+                       u32 bitmask;
+                       u64 resource_id;
+                       u32 counter[16];
+                       /*XVE IO STATS */
+                       u64 stats_handle;
+                       u64 rx_packets;
+                       u64 rx_bytes;
+                       u64 rx_errors;
+                       u64 rx_drops;
+                       u64 rx_overruns;
+                       u64 tx_packets;
+                       u64 tx_bytes;
+                       u64 tx_errors;
+                       u64 tx_drops;
+               } __packed;
+               u8 bytes[512];
+       };
+};
+
+struct xve_ha_info_msg {
+       union {
+               struct {
+                       u8 type;
+                       u8 code;
+                       u16 length;
+                       u32 reserved;
+                       u64 resource_id;        /* vid */
+                       u8 ha_state;
+                       u8 name[XVE_MAX_NAME_SIZE];
+               } __packed;
+               u8 bytes[512];
+       };
+} __packed;
+
+#define ISCSI_MOUNT_DEV_NAME_LEN    100
+#define MAX_DOMAIN_NAME_LEN 64
+
+#define SAN_MOUNT_TYPE_STATIC 1
+#define SAN_MOUNT_TYPE_LVM    2
+#define SAN_MOUNT_TYPE_DIRECT 3
+
+struct xve_iscsi_info {
+       uint64_t vid;
+       uint8_t role;
+       uint16_t vlan_id;
+       uint8_t ip_type;
+       uint32_t ip_addr;
+       uint32_t netmask;
+       uint64_t mac;
+       char xve_name[XVE_MAX_NAME_SIZE];
+       uint32_t gateway_ip_address;
+       uint32_t dns_ip_address;
+       char domain_name[MAX_DOMAIN_NAME_LEN];
+       uint16_t protocol;
+       uint16_t port;
+       uint16_t lun;
+       uint32_t target_ip_address;
+       char target_iqn[ISCSI_MOUNT_DEV_NAME_LEN];      /* Target Name */
+       char target_portal_group[ISCSI_MOUNT_DEV_NAME_LEN];
+       char initiator_iqn[ISCSI_MOUNT_DEV_NAME_LEN];
+
+       uint16_t mount_type;
+       char mount_dev[ISCSI_MOUNT_DEV_NAME_LEN];
+       char mount_options[ISCSI_MOUNT_DEV_NAME_LEN];
+       char vol_group[ISCSI_MOUNT_DEV_NAME_LEN];
+       char vol_group_name[ISCSI_MOUNT_DEV_NAME_LEN];
+} __packed;
+
+struct xve_iscsi_msg {
+       union {
+               struct {
+                       uint8_t type;
+                       uint8_t code;
+                       uint16_t length;
+                       struct xve_iscsi_info iscsi_info;
+               } __packed;
+               uint8_t bytes[960];
+       };
+} __packed;
+
+/* Values for the bitmask of the install/delete/update message*/
+#define XVE_UPDATE_MAC         (1 << 0)
+#define XVE_UPDATE_BANDWIDTH           (1 << 1)
+#define XVE_UPDATE_MTU         (1 << 2)
+#define XVE_UPDATE_TCA_INFO            (1 << 3)
+#define XVE_UPDATE_SL          (1 << 4)
+#define XVE_UPDATE_ENCAP               (1 << 5)
+#define XVE_UPDATE_ADMIN_STATE (1 << 6)
+#define XVE_UPDATE_QOS         (1 << 7)
+#define XVE_UPDATE_ACL         (1 << 8)
+#define XVE_UPDATE_MP_FLAG             (1 << 10)
+#define XVE_XT_STATE_DOWN              (1 << 30)
+#define XVE_UPDATE_XT_CHANGE           (1 << 31)
+
+/* mp_flag */
+#define MP_XVE_PRIMARY         (1 << 0)
+#define MP_XVE_SECONDARY       (1 << 1)
+#define MP_XVE_AUTO_SWITCH     (1 << 2)
+
+/* ha_state */
+#define XVE_HA_STATE_UNKNOWN   0
+#define XVE_HA_STATE_ACTIVE    1
+#define XVE_HA_STATE_STANDBY   2
+
+/* Ack and Nack sent out in the 'code' field */
+#define        XSMP_XVE_ACK            (1 << 6)
+#define        XSMP_XVE_NACK           (1 << 7)
+
+/* Bits for the promiscuous flag field */
+#define XVE_MCAST              (1 << 0)
+
+/* Defines for the install flag */
+#define XVE_INSTALL_TCP_OFFL   (1 << 0)
+#define XVE_INSTALL_UDP_OFFL   (1 << 1)
+#define XVE_INSTALL_TSO        (1 << 3)
+#define XVE_INSTALL_RX_BAT     (1 << 4)
+#define XVE_8K_IBMTU           (1 << 5)
+#define        XVE_INSTALL_LINK2QP     (1 << 8)
+
+#define XSIGO_IP_FRAGMENT_BIT       (1 << 8)
+#define XSIGO_IPV4_BIT              (1 << 6)
+#define XSIGO_TCP_CHKSUM_GOOD_BIT   (1 << 3)
+#define XSIGO_UDP_CHKSUM_GOOD_BIT   (1 << 1)
+
+#endif /* __XVE_XSMP_MSGS_H__ */