]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: ActiveBonding: Create a cluster sync point for failback
authorSantosh Shilimkar <santosh.shilimkar@oracle.com>
Tue, 8 Nov 2016 01:22:51 +0000 (17:22 -0800)
committerChuck Anderson <chuck.anderson@oracle.com>
Fri, 17 Feb 2017 04:29:20 +0000 (20:29 -0800)
On hardware port linkups, at time the multi-cast joins fails
which delays the IP layer to bringup the interface quickly.
Subsequent multi-cast retry might succeed and then the IP
layer will be ready for IP migration. This happens very
sporadically on bare metal systems but more often on VM systems
and the number of multi-cast queries also goes up with number of VMs.

This create load of RC connection thrashing across the cluster
since the IP migration gets staggered which is not ideal for
active active. So we create a sync point so that entire cluster
gets synced up. This helps to reduce the thrashing and premature
failover attempts. Obviously its only applicable for failback

A user sysctl is provided "active_bonding_failback_ms"
in case there is a need to tune the sync point.

Orabug: 25026643

Tested-by: Michael Nowak <michael.nowak@oracle.com>
Tested-by: Dib Chatterjee <dib.chatterjee@oracle.com>
Reviewed-by: Avinash Repaka <avinash.repaka@oracle.com>
Reviewed-by: Mukesh Kacker <mukesh.kacker@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
net/rds/ib.c
net/rds/ib.h
net/rds/ib_sysctl.c

index a5890fbf1c97fcb5554bd3b1f883c33a68fca3e1..2ff8779a08a2e7abac5342642f491fd88a8194bc 100644 (file)
@@ -147,6 +147,16 @@ rds_ibp_all_layers_up(struct rds_ib_port *rds_ibp)
        return 0;
 }
 
+static unsigned long get_failback_sync_jiffies(struct rds_ib_port *rds_ibp)
+{
+       unsigned long t = get_jiffies_64() - rds_ibp->port_active_ts;
+
+       if (t > rds_ib_sysctl_active_bonding_failback_jiffies)
+               return 0;
+
+       return rds_ib_sysctl_active_bonding_failback_jiffies - t;
+}
+
 void rds_ib_nodev_connect(void)
 {
        struct rds_ib_connection *ic;
@@ -1238,6 +1248,7 @@ static void rds_ib_event_handler(struct ib_event_handler *handler,
                if (event->event == IB_EVENT_PORT_ACTIVE) {
                        ip_config[port].port_layerflags |=
                          RDSIBP_STATUS_HWPORTUP;
+                       ip_config[port].port_active_ts = get_jiffies_64();
                } else {
                        /* event->event == IB_EVENT_PORT_ERROR */
                        ip_config[port].port_layerflags &=
@@ -1392,7 +1403,8 @@ static void rds_ib_event_handler(struct ib_event_handler *handler,
                                rds_rtd(RDS_RTD_ACT_BND,
                                        "active bonding fallback enabled\n");
                                INIT_DELAYED_WORK(&work->work, rds_ib_failback);
-                               queue_delayed_work(rds_wq, &work->work, 0);
+                               queue_delayed_work(rds_wq, &work->work,
+                                       get_failback_sync_jiffies(&ip_config[port]));
                        } else
                                kfree(work);
                } else {
@@ -2444,7 +2456,9 @@ static int rds_ib_netdev_callback(struct notifier_block *self, unsigned long eve
                        rds_rtd(RDS_RTD_ACT_BND,
                                "active bonding fallback enabled\n");
                        INIT_DELAYED_WORK(&work->work, rds_ib_failback);
-                       queue_delayed_work(rds_wq, &work->work, 0);
+                       queue_delayed_work(rds_wq, &work->work,
+                                       get_failback_sync_jiffies(&ip_config[port]));
+                       ip_config[port].port_active_ts = 0;
                } else
                        kfree(work);
                break;
index aabdcbc9019f04a21c8bfa453c429c446ae6109c..8fd3e23369adeb5e0f27cb5349f3972b8a4c3a1a 100644 (file)
@@ -360,6 +360,7 @@ struct rds_ib_port {
        uint16_t                pkey;
        unsigned int            alias_cnt;
        struct rds_ib_alias     aliases[RDS_IB_MAX_ALIASES];
+       unsigned long           port_active_ts;
 };
 
 enum {
@@ -665,5 +666,8 @@ extern unsigned int rds_ib_sysctl_flow_control;
 extern unsigned int rds_ib_sysctl_active_bonding;
 extern unsigned int rds_ib_sysctl_trigger_active_bonding;
 extern unsigned int rds_ib_sysctl_disable_unmap_fmr_cpu;
+extern unsigned long rds_ib_active_bonding_failback_min_jiffies;
+extern unsigned long rds_ib_active_bonding_failback_max_jiffies;
+extern unsigned long rds_ib_sysctl_active_bonding_failback_jiffies;
 
 #endif
index f6dfb230d5fb62b235ed9e808c077f70ebe6bb17..5515ee743acf71ad4749ce73a5baa0fa7ba6c501 100644 (file)
@@ -74,6 +74,10 @@ unsigned int rds_ib_sysctl_disable_unmap_fmr_cpu; /* = 0 */
  */
 unsigned int rds_ib_sysctl_trigger_active_bonding; /* = 0 */
 
+unsigned long rds_ib_active_bonding_failback_min_jiffies = HZ;
+unsigned long rds_ib_active_bonding_failback_max_jiffies = HZ * 100;
+unsigned long rds_ib_sysctl_active_bonding_failback_jiffies = HZ * 10;
+
 static struct ctl_table rds_ib_sysctl_table[] = {
        {
                .procname       = "max_send_wr",
@@ -130,6 +134,15 @@ static struct ctl_table rds_ib_sysctl_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dointvec,
        },
+       {
+               .procname       = "active_bonding_failback_ms",
+               .data           = &rds_ib_sysctl_active_bonding_failback_jiffies,
+               .maxlen         = sizeof(rds_ib_sysctl_active_bonding_failback_jiffies),
+               .mode           = 0644,
+               .proc_handler   = proc_doulongvec_ms_jiffies_minmax,
+               .extra1         = &rds_ib_active_bonding_failback_min_jiffies,
+               .extra2         = &rds_ib_active_bonding_failback_max_jiffies,
+       },
        {
                .procname       = "disable_unmap_fmr_cpu_assignment",
                .data           = &rds_ib_sysctl_disable_unmap_fmr_cpu,