From 1af5fdbb68cfe36f3fe5ceb567d05ea38d1b7656 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Mon, 7 Nov 2016 17:22:51 -0800 Subject: [PATCH] RDS: ActiveBonding: Create a cluster sync point for failback On hardware port linkups, at time the multi-cast joins fails which delays the IP layer to bringup the interface quickly. Subsequent multi-cast retry might succeed and then the IP layer will be ready for IP migration. This happens very sporadically on bare metal systems but more often on VM systems and the number of multi-cast queries also goes up with number of VMs. This create load of RC connection thrashing across the cluster since the IP migration gets staggered which is not ideal for active active. So we create a sync point so that entire cluster gets synced up. This helps to reduce the thrashing and premature failover attempts. Obviously its only applicable for failback A user sysctl is provided "active_bonding_failback_ms" in case there is a need to tune the sync point. Orabug: 25026643 Tested-by: Michael Nowak Tested-by: Dib Chatterjee Reviewed-by: Avinash Repaka Reviewed-by: Mukesh Kacker Signed-off-by: Santosh Shilimkar --- net/rds/ib.c | 18 ++++++++++++++++-- net/rds/ib.h | 4 ++++ net/rds/ib_sysctl.c | 13 +++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/net/rds/ib.c b/net/rds/ib.c index a5890fbf1c97..2ff8779a08a2 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -147,6 +147,16 @@ rds_ibp_all_layers_up(struct rds_ib_port *rds_ibp) return 0; } +static unsigned long get_failback_sync_jiffies(struct rds_ib_port *rds_ibp) +{ + unsigned long t = get_jiffies_64() - rds_ibp->port_active_ts; + + if (t > rds_ib_sysctl_active_bonding_failback_jiffies) + return 0; + + return rds_ib_sysctl_active_bonding_failback_jiffies - t; +} + void rds_ib_nodev_connect(void) { struct rds_ib_connection *ic; @@ -1238,6 +1248,7 @@ static void rds_ib_event_handler(struct ib_event_handler *handler, if (event->event == IB_EVENT_PORT_ACTIVE) { ip_config[port].port_layerflags |= RDSIBP_STATUS_HWPORTUP; + ip_config[port].port_active_ts = get_jiffies_64(); } else { /* event->event == IB_EVENT_PORT_ERROR */ ip_config[port].port_layerflags &= @@ -1392,7 +1403,8 @@ static void rds_ib_event_handler(struct ib_event_handler *handler, rds_rtd(RDS_RTD_ACT_BND, "active bonding fallback enabled\n"); INIT_DELAYED_WORK(&work->work, rds_ib_failback); - queue_delayed_work(rds_wq, &work->work, 0); + queue_delayed_work(rds_wq, &work->work, + get_failback_sync_jiffies(&ip_config[port])); } else kfree(work); } else { @@ -2444,7 +2456,9 @@ static int rds_ib_netdev_callback(struct notifier_block *self, unsigned long eve rds_rtd(RDS_RTD_ACT_BND, "active bonding fallback enabled\n"); INIT_DELAYED_WORK(&work->work, rds_ib_failback); - queue_delayed_work(rds_wq, &work->work, 0); + queue_delayed_work(rds_wq, &work->work, + get_failback_sync_jiffies(&ip_config[port])); + ip_config[port].port_active_ts = 0; } else kfree(work); break; diff --git a/net/rds/ib.h b/net/rds/ib.h index aabdcbc9019f..8fd3e23369ad 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -360,6 +360,7 @@ struct rds_ib_port { uint16_t pkey; unsigned int alias_cnt; struct rds_ib_alias aliases[RDS_IB_MAX_ALIASES]; + unsigned long port_active_ts; }; enum { @@ -665,5 +666,8 @@ extern unsigned int rds_ib_sysctl_flow_control; extern unsigned int rds_ib_sysctl_active_bonding; extern unsigned int rds_ib_sysctl_trigger_active_bonding; extern unsigned int rds_ib_sysctl_disable_unmap_fmr_cpu; +extern unsigned long rds_ib_active_bonding_failback_min_jiffies; +extern unsigned long rds_ib_active_bonding_failback_max_jiffies; +extern unsigned long rds_ib_sysctl_active_bonding_failback_jiffies; #endif diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c index f6dfb230d5fb..5515ee743acf 100644 --- a/net/rds/ib_sysctl.c +++ b/net/rds/ib_sysctl.c @@ -74,6 +74,10 @@ unsigned int rds_ib_sysctl_disable_unmap_fmr_cpu; /* = 0 */ */ unsigned int rds_ib_sysctl_trigger_active_bonding; /* = 0 */ +unsigned long rds_ib_active_bonding_failback_min_jiffies = HZ; +unsigned long rds_ib_active_bonding_failback_max_jiffies = HZ * 100; +unsigned long rds_ib_sysctl_active_bonding_failback_jiffies = HZ * 10; + static struct ctl_table rds_ib_sysctl_table[] = { { .procname = "max_send_wr", @@ -130,6 +134,15 @@ static struct ctl_table rds_ib_sysctl_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .procname = "active_bonding_failback_ms", + .data = &rds_ib_sysctl_active_bonding_failback_jiffies, + .maxlen = sizeof(rds_ib_sysctl_active_bonding_failback_jiffies), + .mode = 0644, + .proc_handler = proc_doulongvec_ms_jiffies_minmax, + .extra1 = &rds_ib_active_bonding_failback_min_jiffies, + .extra2 = &rds_ib_active_bonding_failback_max_jiffies, + }, { .procname = "disable_unmap_fmr_cpu_assignment", .data = &rds_ib_sysctl_disable_unmap_fmr_cpu, -- 2.50.1