]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
net: Introduce generic failover module
authorSridhar Samudrala <sridhar.samudrala@intel.com>
Thu, 24 May 2018 16:55:13 +0000 (09:55 -0700)
committerBrian Maly <brian.maly@oracle.com>
Tue, 20 Nov 2018 22:12:27 +0000 (17:12 -0500)
The failover module provides a generic interface for paravirtual drivers
to register a netdev and a set of ops with a failover instance. The ops
are used as event handlers that get called to handle netdev register/
unregister/link change/name change events on slave pci ethernet devices
with the same mac address as the failover netdev.

This enables paravirtual drivers to use a VF as an accelerated low latency
datapath. It also allows migration of VMs with direct attached VFs by
failing over to the paravirtual datapath when the VF is unplugged.

Signed-off-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 30c8bd5aa8b2c78546c3e52337101b9c85879320)
Orabug: 28122104
Signed-off-by: Vijay Balakrishna <vijay.balakrishna@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nelson@oracle.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Made a change to resolve build error as #arguments differs in UEK5
for routine netdev_master_upper_dev_link().

Signed-off-by: Brian Maly <brian.maly@oracle.com>
Conflicts:
include/linux/netdevice.h
(enum net_device_priv_flags list in UEK5 is a subset of upstream list,
merged conflicts manually)

net/Kconfig
(a config absent in UEK5, cherry-pick failed to resolve)

(cherry picked from commit 2c6fa9893c5d8b5e71103af40986d3fd952a28d7)
Signed-off-by: Vijay Balakrishna <vijay.balakrishna@oracle.com>
Reviewed-by: Si-Wei Liu <si-wei.liu@oracle.com>
Reviewed-by: Shannon Nelson <shannon.nelson@oracle.com>
Conflicts:
MAINTAINERS
(the surrounding list where new MAINTAINER for failover added by cherry-pick
differs between UEK5 and UEK4)
include/linux/netdevice.h
(there is additonal code only in UEK5 which cherry-pick inserted into UEK4,
only retained relevant code)
net/core/Makefile
(there are additional objects only in UEK5 which cherry-pick inserted into UEK4
makefile, only relevant object retained)

Signed-off-by: Brian Maly <brian.maly@oracle.com>
Documentation/networking/failover.rst [new file with mode: 0644]
MAINTAINERS
include/linux/netdevice.h
include/net/failover.h [new file with mode: 0644]
net/Kconfig
net/core/Makefile
net/core/failover.c [new file with mode: 0644]

diff --git a/Documentation/networking/failover.rst b/Documentation/networking/failover.rst
new file mode 100644 (file)
index 0000000..f0c8483
--- /dev/null
@@ -0,0 +1,18 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========
+FAILOVER
+========
+
+Overview
+========
+
+The failover module provides a generic interface for paravirtual drivers
+to register a netdev and a set of ops with a failover instance. The ops
+are used as event handlers that get called to handle netdev register/
+unregister/link change/name change events on slave pci ethernet devices
+with the same mac address as the failover netdev.
+
+This enables paravirtual drivers to use a VF as an accelerated low latency
+datapath. It also allows live migration of VMs with direct attached VFs by
+failing over to the paravirtual datapath when the VF is unplugged.
index e60b7ce1f035e06f0221710b4d8a6ed37bb50a1f..f88ef98b11f9ef0b0a70d77a15dede57878fba64 100644 (file)
@@ -3988,6 +3988,14 @@ T:       git git://linuxtv.org/anttip/media_tree.git
 S:     Maintained
 F:     drivers/media/tuners/fc2580*
 
+FAILOVER MODULE
+M:     Sridhar Samudrala <sridhar.samudrala@intel.com>
+L:     netdev@vger.kernel.org
+S:     Supported
+F:     net/core/failover.c
+F:     include/net/failover.h
+F:     Documentation/networking/failover.rst
+
 FANOTIFY
 M:     Eric Paris <eparis@redhat.com>
 S:     Maintained
index dddf136fff165b446fec1046e4efd59a44af4fa0..7eaee17f54b710f1e88be0b843e215d0fa39e294 100644 (file)
@@ -1248,6 +1248,8 @@ struct net_device_ops {
  * @IFF_MACVLAN: Macvlan device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_MACSEC: device is a MACsec device
+ * @IFF_FAILOVER: device is a failover master device
+ * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
  */
 enum netdev_priv_flags {
        IFF_802_1Q_VLAN                 = 1<<0,
@@ -1277,6 +1279,8 @@ enum netdev_priv_flags {
        IFF_IPVLAN_SLAVE                = 1<<24,
        IFF_NO_QUEUE                    = 1<<26,
        IFF_MACSEC                      = 1<<27,
+       IFF_FAILOVER                    = 1<<28,
+       IFF_FAILOVER_SLAVE              = 1<<29,
 };
 
 #define IFF_802_1Q_VLAN                        IFF_802_1Q_VLAN
@@ -1306,6 +1310,8 @@ enum netdev_priv_flags {
 #define IFF_IPVLAN_SLAVE               IFF_IPVLAN_SLAVE
 #define IFF_NO_QUEUE                   IFF_NO_QUEUE
 #define IFF_MACSEC                     IFF_MACSEC
+#define IFF_FAILOVER                   IFF_FAILOVER
+#define IFF_FAILOVER_SLAVE             IFF_FAILOVER_SLAVE
 
 /**
  *     struct net_device - The DEVICE structure.
@@ -3886,6 +3892,16 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
        return dev->priv_flags & IFF_SUPP_NOFCS;
 }
 
+static inline bool netif_is_failover(const struct net_device *dev)
+{
+       return dev->priv_flags & IFF_FAILOVER;
+}
+
+static inline bool netif_is_failover_slave(const struct net_device *dev)
+{
+       return dev->priv_flags & IFF_FAILOVER_SLAVE;
+}
+
 /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
 static inline void netif_keep_dst(struct net_device *dev)
 {
diff --git a/include/net/failover.h b/include/net/failover.h
new file mode 100644 (file)
index 0000000..bb15438
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018, Intel Corporation. */
+
+#ifndef _FAILOVER_H
+#define _FAILOVER_H
+
+#include <linux/netdevice.h>
+
+struct failover_ops {
+       int (*slave_pre_register)(struct net_device *slave_dev,
+                                 struct net_device *failover_dev);
+       int (*slave_register)(struct net_device *slave_dev,
+                             struct net_device *failover_dev);
+       int (*slave_pre_unregister)(struct net_device *slave_dev,
+                                   struct net_device *failover_dev);
+       int (*slave_unregister)(struct net_device *slave_dev,
+                               struct net_device *failover_dev);
+       int (*slave_link_change)(struct net_device *slave_dev,
+                                struct net_device *failover_dev);
+       int (*slave_name_change)(struct net_device *slave_dev,
+                                struct net_device *failover_dev);
+       rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb);
+};
+
+struct failover {
+       struct list_head list;
+       struct net_device __rcu *failover_dev;
+       struct failover_ops __rcu *ops;
+};
+
+struct failover *failover_register(struct net_device *dev,
+                                  struct failover_ops *ops);
+void failover_unregister(struct failover *failover);
+int failover_slave_unregister(struct net_device *slave_dev);
+
+#endif /* _FAILOVER_H */
index 44dd5786ee91da16ae920d3c9f62b1f4bac353c8..44385a5e5fd569569cbd67cfc21d614961f211da 100644 (file)
@@ -372,6 +372,19 @@ source "net/ceph/Kconfig"
 source "net/nfc/Kconfig"
 
 
+config FAILOVER
+       tristate "Generic failover module"
+       help
+         The failover module provides a generic interface for paravirtual
+         drivers to register a netdev and a set of ops with a failover
+         instance. The ops are used as event handlers that get called to
+         handle netdev register/unregister/link change/name change events
+         on slave pci ethernet devices with the same mac address as the
+         failover netdev. This enables paravirtual drivers to use a
+         VF as an accelerated low latency datapath. It also allows live
+         migration of VMs with direct attached VFs by failing over to the
+         paravirtual datapath when the VF is unplugged.
+
 endif   # if NET
 
 # Used by archs to tell that they support BPF_JIT
index fec0856dd6c031a2ae369410fc5d7f9c25a1fcf6..fee3bdbd44ecabc98e74d35b7cd5e285f10e776a 100644 (file)
@@ -23,3 +23,4 @@ obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
 obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
 obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
+obj-$(CONFIG_FAILOVER) += failover.o
diff --git a/net/core/failover.c b/net/core/failover.c
new file mode 100644 (file)
index 0000000..b3e14ab
--- /dev/null
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* A common module to handle registrations and notifications for paravirtual
+ * drivers to enable accelerated datapath and support VF live migration.
+ *
+ * The notifier and event handling code is based on netvsc driver.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <uapi/linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <net/failover.h>
+
+static LIST_HEAD(failover_list);
+static DEFINE_SPINLOCK(failover_lock);
+
+static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops)
+{
+       struct net_device *failover_dev;
+       struct failover *failover;
+
+       spin_lock(&failover_lock);
+       list_for_each_entry(failover, &failover_list, list) {
+               failover_dev = rtnl_dereference(failover->failover_dev);
+               if (ether_addr_equal(failover_dev->perm_addr, mac)) {
+                       *ops = rtnl_dereference(failover->ops);
+                       spin_unlock(&failover_lock);
+                       return failover_dev;
+               }
+       }
+       spin_unlock(&failover_lock);
+       return NULL;
+}
+
+/**
+ * failover_slave_register - Register a slave netdev
+ *
+ * @slave_dev: slave netdev that is being registered
+ *
+ * Registers a slave device to a failover instance. Only ethernet devices
+ * are supported.
+ */
+static int failover_slave_register(struct net_device *slave_dev)
+{
+       struct netdev_lag_upper_info lag_upper_info;
+       struct net_device *failover_dev;
+       struct failover_ops *fops;
+       int err;
+
+       if (slave_dev->type != ARPHRD_ETHER)
+               goto done;
+
+       ASSERT_RTNL();
+
+       failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+       if (!failover_dev)
+               goto done;
+
+       if (fops && fops->slave_pre_register &&
+           fops->slave_pre_register(slave_dev, failover_dev))
+               goto done;
+
+       err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame,
+                                        failover_dev);
+       if (err) {
+               netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n",
+                          err);
+               goto done;
+       }
+
+       lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
+       err = netdev_master_upper_dev_link_private(slave_dev, failover_dev,
+                                                  NULL, &lag_upper_info);
+       if (err) {
+               netdev_err(slave_dev, "can not set failover device %s (err = %d)\n",
+                          failover_dev->name, err);
+               goto err_upper_link;
+       }
+
+       slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+
+       if (fops && fops->slave_register &&
+           !fops->slave_register(slave_dev, failover_dev))
+               return NOTIFY_OK;
+
+       netdev_upper_dev_unlink(slave_dev, failover_dev);
+       slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+err_upper_link:
+       netdev_rx_handler_unregister(slave_dev);
+done:
+       return NOTIFY_DONE;
+}
+
+/**
+ * failover_slave_unregister - Unregister a slave netdev
+ *
+ * @slave_dev: slave netdev that is being unregistered
+ *
+ * Unregisters a slave device from a failover instance.
+ */
+int failover_slave_unregister(struct net_device *slave_dev)
+{
+       struct net_device *failover_dev;
+       struct failover_ops *fops;
+
+       if (!netif_is_failover_slave(slave_dev))
+               goto done;
+
+       ASSERT_RTNL();
+
+       failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+       if (!failover_dev)
+               goto done;
+
+       if (fops && fops->slave_pre_unregister &&
+           fops->slave_pre_unregister(slave_dev, failover_dev))
+               goto done;
+
+       netdev_rx_handler_unregister(slave_dev);
+       netdev_upper_dev_unlink(slave_dev, failover_dev);
+       slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+
+       if (fops && fops->slave_unregister &&
+           !fops->slave_unregister(slave_dev, failover_dev))
+               return NOTIFY_OK;
+
+done:
+       return NOTIFY_DONE;
+}
+EXPORT_SYMBOL_GPL(failover_slave_unregister);
+
+static int failover_slave_link_change(struct net_device *slave_dev)
+{
+       struct net_device *failover_dev;
+       struct failover_ops *fops;
+
+       if (!netif_is_failover_slave(slave_dev))
+               goto done;
+
+       ASSERT_RTNL();
+
+       failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+       if (!failover_dev)
+               goto done;
+
+       if (!netif_running(failover_dev))
+               goto done;
+
+       if (fops && fops->slave_link_change &&
+           !fops->slave_link_change(slave_dev, failover_dev))
+               return NOTIFY_OK;
+
+done:
+       return NOTIFY_DONE;
+}
+
+static int failover_slave_name_change(struct net_device *slave_dev)
+{
+       struct net_device *failover_dev;
+       struct failover_ops *fops;
+
+       if (!netif_is_failover_slave(slave_dev))
+               goto done;
+
+       ASSERT_RTNL();
+
+       failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+       if (!failover_dev)
+               goto done;
+
+       if (!netif_running(failover_dev))
+               goto done;
+
+       if (fops && fops->slave_name_change &&
+           !fops->slave_name_change(slave_dev, failover_dev))
+               return NOTIFY_OK;
+
+done:
+       return NOTIFY_DONE;
+}
+
+static int
+failover_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+       struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+
+       /* Skip parent events */
+       if (netif_is_failover(event_dev))
+               return NOTIFY_DONE;
+
+       switch (event) {
+       case NETDEV_REGISTER:
+               return failover_slave_register(event_dev);
+       case NETDEV_UNREGISTER:
+               return failover_slave_unregister(event_dev);
+       case NETDEV_UP:
+       case NETDEV_DOWN:
+       case NETDEV_CHANGE:
+               return failover_slave_link_change(event_dev);
+       case NETDEV_CHANGENAME:
+               return failover_slave_name_change(event_dev);
+       default:
+               return NOTIFY_DONE;
+       }
+}
+
+static struct notifier_block failover_notifier = {
+       .notifier_call = failover_event,
+};
+
+static void
+failover_existing_slave_register(struct net_device *failover_dev)
+{
+       struct net *net = dev_net(failover_dev);
+       struct net_device *dev;
+
+       rtnl_lock();
+       for_each_netdev(net, dev) {
+               if (netif_is_failover(dev))
+                       continue;
+               if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
+                       failover_slave_register(dev);
+       }
+       rtnl_unlock();
+}
+
+/**
+ * failover_register - Register a failover instance
+ *
+ * @dev: failover netdev
+ * @ops: failover ops
+ *
+ * Allocate and register a failover instance for a failover netdev. ops
+ * provides handlers for slave device register/unregister/link change/
+ * name change events.
+ *
+ * Return: pointer to failover instance
+ */
+struct failover *failover_register(struct net_device *dev,
+                                  struct failover_ops *ops)
+{
+       struct failover *failover;
+
+       if (dev->type != ARPHRD_ETHER)
+               return ERR_PTR(-EINVAL);
+
+       failover = kzalloc(sizeof(*failover), GFP_KERNEL);
+       if (!failover)
+               return ERR_PTR(-ENOMEM);
+
+       rcu_assign_pointer(failover->ops, ops);
+       dev_hold(dev);
+       dev->priv_flags |= IFF_FAILOVER;
+       rcu_assign_pointer(failover->failover_dev, dev);
+
+       spin_lock(&failover_lock);
+       list_add_tail(&failover->list, &failover_list);
+       spin_unlock(&failover_lock);
+
+       netdev_info(dev, "failover master:%s registered\n", dev->name);
+
+       failover_existing_slave_register(dev);
+
+       return failover;
+}
+EXPORT_SYMBOL_GPL(failover_register);
+
+/**
+ * failover_unregister - Unregister a failover instance
+ *
+ * @failover: pointer to failover instance
+ *
+ * Unregisters and frees a failover instance.
+ */
+void failover_unregister(struct failover *failover)
+{
+       struct net_device *failover_dev;
+
+       failover_dev = rcu_dereference(failover->failover_dev);
+
+       netdev_info(failover_dev, "failover master:%s unregistered\n",
+                   failover_dev->name);
+
+       failover_dev->priv_flags &= ~IFF_FAILOVER;
+       dev_put(failover_dev);
+
+       spin_lock(&failover_lock);
+       list_del(&failover->list);
+       spin_unlock(&failover_lock);
+
+       kfree(failover);
+}
+EXPORT_SYMBOL_GPL(failover_unregister);
+
+static __init int
+failover_init(void)
+{
+       register_netdevice_notifier(&failover_notifier);
+
+       return 0;
+}
+module_init(failover_init);
+
+static __exit
+void failover_exit(void)
+{
+       unregister_netdevice_notifier(&failover_notifier);
+}
+module_exit(failover_exit);
+
+MODULE_DESCRIPTION("Generic failover infrastructure/interface");
+MODULE_LICENSE("GPL v2");