--- /dev/null
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below.  You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      1. Redistributions of source code must retain the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer.
+ *
+ *      2. Redistributions in binary form must reproduce the above
+ *         copyright notice, this list of conditions and the following
+ *         disclaimer in the documentation and/or other materials
+ *         provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "main.h"
+
+/* LAG group config flags. */
+#define NFP_FL_LAG_LAST                        BIT(1)
+#define NFP_FL_LAG_FIRST               BIT(2)
+#define NFP_FL_LAG_SWITCH              BIT(6)
+#define NFP_FL_LAG_RESET               BIT(7)
+
+/* LAG port state flags. */
+#define NFP_PORT_LAG_LINK_UP           BIT(0)
+#define NFP_PORT_LAG_TX_ENABLED                BIT(1)
+#define NFP_PORT_LAG_CHANGED           BIT(2)
+
+enum nfp_fl_lag_batch {
+       NFP_FL_LAG_BATCH_FIRST,
+       NFP_FL_LAG_BATCH_MEMBER,
+       NFP_FL_LAG_BATCH_FINISHED
+};
+
+/**
+ * struct nfp_flower_cmsg_lag_config - control message payload for LAG config
+ * @ctrl_flags:        Configuration flags
+ * @reserved:  Reserved for future use
+ * @ttl:       Time to live of packet - host always sets to 0xff
+ * @pkt_number:        Config message packet number - increment for each message
+ * @batch_ver: Batch version of messages - increment for each batch of messages
+ * @group_id:  Group ID applicable
+ * @group_inst:        Group instance number - increment when group is reused
+ * @members:   Array of 32-bit words listing all active group members
+ */
+struct nfp_flower_cmsg_lag_config {
+       u8 ctrl_flags;
+       u8 reserved[2];
+       u8 ttl;
+       __be32 pkt_number;
+       __be32 batch_ver;
+       __be32 group_id;
+       __be32 group_inst;
+       __be32 members[];
+};
+
+/**
+ * struct nfp_fl_lag_group - list entry for each LAG group
+ * @group_id:          Assigned group ID for host/kernel sync
+ * @group_inst:                Group instance in case of ID reuse
+ * @list:              List entry
+ * @master_ndev:       Group master Netdev
+ * @dirty:             Marked if the group needs synced to HW
+ * @offloaded:         Marked if the group is currently offloaded to NIC
+ * @to_remove:         Marked if the group should be removed from NIC
+ * @to_destroy:                Marked if the group should be removed from driver
+ * @slave_cnt:         Number of slaves in group
+ */
+struct nfp_fl_lag_group {
+       unsigned int group_id;
+       u8 group_inst;
+       struct list_head list;
+       struct net_device *master_ndev;
+       bool dirty;
+       bool offloaded;
+       bool to_remove;
+       bool to_destroy;
+       unsigned int slave_cnt;
+};
+
+#define NFP_FL_LAG_PKT_NUMBER_MASK     GENMASK(30, 0)
+#define NFP_FL_LAG_VERSION_MASK                GENMASK(22, 0)
+#define NFP_FL_LAG_HOST_TTL            0xff
+
+/* Use this ID with zero members to ack a batch config */
+#define NFP_FL_LAG_SYNC_ID             0
+#define NFP_FL_LAG_GROUP_MIN           1 /* ID 0 reserved */
+#define NFP_FL_LAG_GROUP_MAX           32 /* IDs 1 to 31 are valid */
+
+/* wait for more config */
+#define NFP_FL_LAG_DELAY               (msecs_to_jiffies(2))
+
+static unsigned int nfp_fl_get_next_pkt_number(struct nfp_fl_lag *lag)
+{
+       lag->pkt_num++;
+       lag->pkt_num &= NFP_FL_LAG_PKT_NUMBER_MASK;
+
+       return lag->pkt_num;
+}
+
+static void nfp_fl_increment_version(struct nfp_fl_lag *lag)
+{
+       /* LSB is not considered by firmware so add 2 for each increment. */
+       lag->batch_ver += 2;
+       lag->batch_ver &= NFP_FL_LAG_VERSION_MASK;
+
+       /* Zero is reserved by firmware. */
+       if (!lag->batch_ver)
+               lag->batch_ver += 2;
+}
+
+static struct nfp_fl_lag_group *
+nfp_fl_lag_group_create(struct nfp_fl_lag *lag, struct net_device *master)
+{
+       struct nfp_fl_lag_group *group;
+       struct nfp_flower_priv *priv;
+       int id;
+
+       priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+       id = ida_simple_get(&lag->ida_handle, NFP_FL_LAG_GROUP_MIN,
+                           NFP_FL_LAG_GROUP_MAX, GFP_KERNEL);
+       if (id < 0) {
+               nfp_flower_cmsg_warn(priv->app,
+                                    "No more bonding groups available\n");
+               return ERR_PTR(id);
+       }
+
+       group = kmalloc(sizeof(*group), GFP_KERNEL);
+       if (!group) {
+               ida_simple_remove(&lag->ida_handle, id);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       group->group_id = id;
+       group->master_ndev = master;
+       group->dirty = true;
+       group->offloaded = false;
+       group->to_remove = false;
+       group->to_destroy = false;
+       group->slave_cnt = 0;
+       group->group_inst = ++lag->global_inst;
+       list_add_tail(&group->list, &lag->group_list);
+
+       return group;
+}
+
+static struct nfp_fl_lag_group *
+nfp_fl_lag_find_group_for_master_with_lag(struct nfp_fl_lag *lag,
+                                         struct net_device *master)
+{
+       struct nfp_fl_lag_group *entry;
+
+       if (!master)
+               return NULL;
+
+       list_for_each_entry(entry, &lag->group_list, list)
+               if (entry->master_ndev == master)
+                       return entry;
+
+       return NULL;
+}
+
+static int
+nfp_fl_lag_config_group(struct nfp_fl_lag *lag, struct nfp_fl_lag_group *group,
+                       struct net_device **active_members,
+                       unsigned int member_cnt, enum nfp_fl_lag_batch *batch)
+{
+       struct nfp_flower_cmsg_lag_config *cmsg_payload;
+       struct nfp_flower_priv *priv;
+       unsigned long int flags;
+       unsigned int size, i;
+       struct sk_buff *skb;
+
+       priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+       size = sizeof(*cmsg_payload) + sizeof(__be32) * member_cnt;
+       skb = nfp_flower_cmsg_alloc(priv->app, size,
+                                   NFP_FLOWER_CMSG_TYPE_LAG_CONFIG,
+                                   GFP_KERNEL);
+       if (!skb)
+               return -ENOMEM;
+
+       cmsg_payload = nfp_flower_cmsg_get_data(skb);
+       flags = 0;
+
+       /* Increment batch version for each new batch of config messages. */
+       if (*batch == NFP_FL_LAG_BATCH_FIRST) {
+               flags |= NFP_FL_LAG_FIRST;
+               nfp_fl_increment_version(lag);
+               *batch = NFP_FL_LAG_BATCH_MEMBER;
+       }
+
+       /* If it is a reset msg then it is also the end of the batch. */
+       if (lag->rst_cfg) {
+               flags |= NFP_FL_LAG_RESET;
+               *batch = NFP_FL_LAG_BATCH_FINISHED;
+       }
+
+       /* To signal the end of a batch, both the switch and last flags are set
+        * and the the reserved SYNC group ID is used.
+        */
+       if (*batch == NFP_FL_LAG_BATCH_FINISHED) {
+               flags |= NFP_FL_LAG_SWITCH | NFP_FL_LAG_LAST;
+               lag->rst_cfg = false;
+               cmsg_payload->group_id = cpu_to_be32(NFP_FL_LAG_SYNC_ID);
+               cmsg_payload->group_inst = 0;
+       } else {
+               cmsg_payload->group_id = cpu_to_be32(group->group_id);
+               cmsg_payload->group_inst = cpu_to_be32(group->group_inst);
+       }
+
+       cmsg_payload->reserved[0] = 0;
+       cmsg_payload->reserved[1] = 0;
+       cmsg_payload->ttl = NFP_FL_LAG_HOST_TTL;
+       cmsg_payload->ctrl_flags = flags;
+       cmsg_payload->batch_ver = cpu_to_be32(lag->batch_ver);
+       cmsg_payload->pkt_number = cpu_to_be32(nfp_fl_get_next_pkt_number(lag));
+
+       for (i = 0; i < member_cnt; i++)
+               cmsg_payload->members[i] =
+                       cpu_to_be32(nfp_repr_get_port_id(active_members[i]));
+
+       nfp_ctrl_tx(priv->app->ctrl, skb);
+       return 0;
+}
+
+static void nfp_fl_lag_do_work(struct work_struct *work)
+{
+       enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST;
+       struct nfp_fl_lag_group *entry, *storage;
+       struct delayed_work *delayed_work;
+       struct nfp_flower_priv *priv;
+       struct nfp_fl_lag *lag;
+       int err;
+
+       delayed_work = to_delayed_work(work);
+       lag = container_of(delayed_work, struct nfp_fl_lag, work);
+       priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+       mutex_lock(&lag->lock);
+       list_for_each_entry_safe(entry, storage, &lag->group_list, list) {
+               struct net_device *iter_netdev, **acti_netdevs;
+               struct nfp_flower_repr_priv *repr_priv;
+               int active_count = 0, slaves = 0;
+               struct nfp_repr *repr;
+               unsigned long *flags;
+
+               if (entry->to_remove) {
+                       /* Active count of 0 deletes group on hw. */
+                       err = nfp_fl_lag_config_group(lag, entry, NULL, 0,
+                                                     &batch);
+                       if (!err) {
+                               entry->to_remove = false;
+                               entry->offloaded = false;
+                       } else {
+                               nfp_flower_cmsg_warn(priv->app,
+                                                    "group delete failed\n");
+                               schedule_delayed_work(&lag->work,
+                                                     NFP_FL_LAG_DELAY);
+                               continue;
+                       }
+
+                       if (entry->to_destroy) {
+                               ida_simple_remove(&lag->ida_handle,
+                                                 entry->group_id);
+                               list_del(&entry->list);
+                               kfree(entry);
+                       }
+                       continue;
+               }
+
+               acti_netdevs = kmalloc_array(entry->slave_cnt,
+                                            sizeof(*acti_netdevs), GFP_KERNEL);
+
+               /* Include sanity check in the loop. It may be that a bond has
+                * changed between processing the last notification and the
+                * work queue triggering. If the number of slaves has changed
+                * or it now contains netdevs that cannot be offloaded, ignore
+                * the group until pending notifications are processed.
+                */
+               rcu_read_lock();
+               for_each_netdev_in_bond_rcu(entry->master_ndev, iter_netdev) {
+                       if (!nfp_netdev_is_nfp_repr(iter_netdev)) {
+                               slaves = 0;
+                               break;
+                       }
+
+                       repr = netdev_priv(iter_netdev);
+
+                       if (repr->app != priv->app) {
+                               slaves = 0;
+                               break;
+                       }
+
+                       slaves++;
+                       if (slaves > entry->slave_cnt)
+                               break;
+
+                       /* Check the ports for state changes. */
+                       repr_priv = repr->app_priv;
+                       flags = &repr_priv->lag_port_flags;
+
+                       if (*flags & NFP_PORT_LAG_CHANGED) {
+                               *flags &= ~NFP_PORT_LAG_CHANGED;
+                               entry->dirty = true;
+                       }
+
+                       if ((*flags & NFP_PORT_LAG_TX_ENABLED) &&
+                           (*flags & NFP_PORT_LAG_LINK_UP))
+                               acti_netdevs[active_count++] = iter_netdev;
+               }
+               rcu_read_unlock();
+
+               if (slaves != entry->slave_cnt || !entry->dirty) {
+                       kfree(acti_netdevs);
+                       continue;
+               }
+
+               err = nfp_fl_lag_config_group(lag, entry, acti_netdevs,
+                                             active_count, &batch);
+               if (!err) {
+                       entry->offloaded = true;
+                       entry->dirty = false;
+               } else {
+                       nfp_flower_cmsg_warn(priv->app,
+                                            "group offload failed\n");
+                       schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+               }
+
+               kfree(acti_netdevs);
+       }
+
+       /* End the config batch if at least one packet has been batched. */
+       if (batch == NFP_FL_LAG_BATCH_MEMBER) {
+               batch = NFP_FL_LAG_BATCH_FINISHED;
+               err = nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch);
+               if (err)
+                       nfp_flower_cmsg_warn(priv->app,
+                                            "group batch end cmsg failed\n");
+       }
+
+       mutex_unlock(&lag->lock);
+}
+
+static void
+nfp_fl_lag_schedule_group_remove(struct nfp_fl_lag *lag,
+                                struct nfp_fl_lag_group *group)
+{
+       group->to_remove = true;
+
+       schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+}
+
+static int
+nfp_fl_lag_schedule_group_delete(struct nfp_fl_lag *lag,
+                                struct net_device *master)
+{
+       struct nfp_fl_lag_group *group;
+
+       mutex_lock(&lag->lock);
+       group = nfp_fl_lag_find_group_for_master_with_lag(lag, master);
+       if (!group) {
+               mutex_unlock(&lag->lock);
+               return -ENOENT;
+       }
+
+       group->to_remove = true;
+       group->to_destroy = true;
+       mutex_unlock(&lag->lock);
+
+       schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+       return 0;
+}
+
+static int
+nfp_fl_lag_changeupper_event(struct nfp_fl_lag *lag,
+                            struct netdev_notifier_changeupper_info *info)
+{
+       struct net_device *upper = info->upper_dev, *iter_netdev;
+       struct netdev_lag_upper_info *lag_upper_info;
+       struct nfp_fl_lag_group *group;
+       struct nfp_flower_priv *priv;
+       unsigned int slave_count = 0;
+       bool can_offload = true;
+       struct nfp_repr *repr;
+
+       if (!netif_is_lag_master(upper))
+               return 0;
+
+       priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+
+       rcu_read_lock();
+       for_each_netdev_in_bond_rcu(upper, iter_netdev) {
+               if (!nfp_netdev_is_nfp_repr(iter_netdev)) {
+                       can_offload = false;
+                       break;
+               }
+               repr = netdev_priv(iter_netdev);
+
+               /* Ensure all ports are created by the same app/on same card. */
+               if (repr->app != priv->app) {
+                       can_offload = false;
+                       break;
+               }
+
+               slave_count++;
+       }
+       rcu_read_unlock();
+
+       lag_upper_info = info->upper_info;
+
+       /* Firmware supports active/backup and L3/L4 hash bonds. */
+       if (lag_upper_info &&
+           lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
+           (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH ||
+           (lag_upper_info->hash_type != NETDEV_LAG_HASH_L34 &&
+           lag_upper_info->hash_type != NETDEV_LAG_HASH_E34))) {
+               can_offload = false;
+               nfp_flower_cmsg_warn(priv->app,
+                                    "Unable to offload tx_type %u hash %u\n",
+                                    lag_upper_info->tx_type,
+                                    lag_upper_info->hash_type);
+       }
+
+       mutex_lock(&lag->lock);
+       group = nfp_fl_lag_find_group_for_master_with_lag(lag, upper);
+
+       if (slave_count == 0 || !can_offload) {
+               /* Cannot offload the group - remove if previously offloaded. */
+               if (group && group->offloaded)
+                       nfp_fl_lag_schedule_group_remove(lag, group);
+
+               mutex_unlock(&lag->lock);
+               return 0;
+       }
+
+       if (!group) {
+               group = nfp_fl_lag_group_create(lag, upper);
+               if (IS_ERR(group)) {
+                       mutex_unlock(&lag->lock);
+                       return PTR_ERR(group);
+               }
+       }
+
+       group->dirty = true;
+       group->slave_cnt = slave_count;
+
+       /* Group may have been on queue for removal but is now offfloable. */
+       group->to_remove = false;
+       mutex_unlock(&lag->lock);
+
+       schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+       return 0;
+}
+
+static int
+nfp_fl_lag_changels_event(struct nfp_fl_lag *lag, struct net_device *netdev,
+                         struct netdev_notifier_changelowerstate_info *info)
+{
+       struct netdev_lag_lower_state_info *lag_lower_info;
+       struct nfp_flower_repr_priv *repr_priv;
+       struct nfp_flower_priv *priv;
+       struct nfp_repr *repr;
+       unsigned long *flags;
+
+       if (!netif_is_lag_port(netdev) || !nfp_netdev_is_nfp_repr(netdev))
+               return 0;
+
+       lag_lower_info = info->lower_state_info;
+       if (!lag_lower_info)
+               return 0;
+
+       priv = container_of(lag, struct nfp_flower_priv, nfp_lag);
+       repr = netdev_priv(netdev);
+
+       /* Verify that the repr is associated with this app. */
+       if (repr->app != priv->app)
+               return 0;
+
+       repr_priv = repr->app_priv;
+       flags = &repr_priv->lag_port_flags;
+
+       mutex_lock(&lag->lock);
+       if (lag_lower_info->link_up)
+               *flags |= NFP_PORT_LAG_LINK_UP;
+       else
+               *flags &= ~NFP_PORT_LAG_LINK_UP;
+
+       if (lag_lower_info->tx_enabled)
+               *flags |= NFP_PORT_LAG_TX_ENABLED;
+       else
+               *flags &= ~NFP_PORT_LAG_TX_ENABLED;
+
+       *flags |= NFP_PORT_LAG_CHANGED;
+       mutex_unlock(&lag->lock);
+
+       schedule_delayed_work(&lag->work, NFP_FL_LAG_DELAY);
+       return 0;
+}
+
+static int
+nfp_fl_lag_netdev_event(struct notifier_block *nb, unsigned long event,
+                       void *ptr)
+{
+       struct net_device *netdev;
+       struct nfp_fl_lag *lag;
+       int err;
+
+       netdev = netdev_notifier_info_to_dev(ptr);
+       lag = container_of(nb, struct nfp_fl_lag, lag_nb);
+
+       switch (event) {
+       case NETDEV_CHANGEUPPER:
+               err = nfp_fl_lag_changeupper_event(lag, ptr);
+               if (err)
+                       return NOTIFY_BAD;
+               return NOTIFY_OK;
+       case NETDEV_CHANGELOWERSTATE:
+               err = nfp_fl_lag_changels_event(lag, netdev, ptr);
+               if (err)
+                       return NOTIFY_BAD;
+               return NOTIFY_OK;
+       case NETDEV_UNREGISTER:
+               if (netif_is_bond_master(netdev)) {
+                       err = nfp_fl_lag_schedule_group_delete(lag, netdev);
+                       if (err)
+                               return NOTIFY_BAD;
+                       return NOTIFY_OK;
+               }
+       }
+
+       return NOTIFY_DONE;
+}
+
+int nfp_flower_lag_reset(struct nfp_fl_lag *lag)
+{
+       enum nfp_fl_lag_batch batch = NFP_FL_LAG_BATCH_FIRST;
+
+       lag->rst_cfg = true;
+       return nfp_fl_lag_config_group(lag, NULL, NULL, 0, &batch);
+}
+
+void nfp_flower_lag_init(struct nfp_fl_lag *lag)
+{
+       INIT_DELAYED_WORK(&lag->work, nfp_fl_lag_do_work);
+       INIT_LIST_HEAD(&lag->group_list);
+       mutex_init(&lag->lock);
+       ida_init(&lag->ida_handle);
+
+       /* 0 is a reserved batch version so increment to first valid value. */
+       nfp_fl_increment_version(lag);
+
+       lag->lag_nb.notifier_call = nfp_fl_lag_netdev_event;
+}
+
+void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag)
+{
+       struct nfp_fl_lag_group *entry, *storage;
+
+       cancel_delayed_work_sync(&lag->work);
+
+       /* Remove all groups. */
+       mutex_lock(&lag->lock);
+       list_for_each_entry_safe(entry, storage, &lag->group_list, list) {
+               list_del(&entry->list);
+               kfree(entry);
+       }
+       mutex_unlock(&lag->lock);
+       mutex_destroy(&lag->lock);
+       ida_destroy(&lag->ida_handle);
+}