]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: Idle QoS connections during remote peer reboot causing application brownout
authorChien-Hua Yen <chien.yen@oracle.com>
Tue, 18 Mar 2014 21:46:49 +0000 (14:46 -0700)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Wed, 8 Jul 2015 20:59:51 +0000 (13:59 -0700)
This fix addresses the issue with the idled QoS connection not getting
disconnect event when the remote peer reboots. This is causing delayed
reconnect, hence application brownout when the peer comes online. The fix was
to proactively drop and reconnect them when the base lane is going through
the reconnect to the reboot peer, in effect forcing all the lanes to go
through the reconnect at the same time.

Orabug: 18443194

Signed-off-by: Bang Nguyen <bang.nguyen@oracle.com>
Signed-off-by: Chien-Hua Yen <chien.yen@oracle.com>
(cherry picked from commit f51ccefb3a0b9485da5cc5f66bb1e311f61bd70b)

net/rds/connection.c
net/rds/ib.c

index e5337aef1c8d449812c2c494e762b85584ffe1b3..3067c6be7bf0d16c62bd0b04f9fc9779ffd9d9fa 100644 (file)
@@ -580,6 +580,32 @@ void rds_conn_exit(void)
                                 rds_conn_message_info_retrans);
 }
 
+/*
+ * Drop connections when the idled QoS connection not getting
+ * disconnect event when the remote peer reboots.  This is causing
+ * delayed reconnect, hence application brownout when the peer comes online.
+ * The fix was to proactively drop and reconnect them when the base lane is
+ * going through the reconnect to the reboot peer, in effect forcing all
+ * the lanes to go through the reconnect at the same time.
+ */
+static void rds_conn_shutdown_lanes(struct rds_connection *conn)
+{
+       struct hlist_head *head =
+               rds_conn_bucket(conn->c_laddr, conn->c_faddr);
+       struct rds_connection *tmp;
+
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(tmp, head, c_hash_node) {
+               if (tmp->c_faddr == conn->c_faddr &&
+                       tmp->c_laddr == conn->c_laddr &&
+                       tmp->c_tos != 0 &&
+                       tmp->c_trans == conn->c_trans) {
+                               rds_conn_drop(tmp);
+               }
+       }
+       rcu_read_unlock();
+}
+
 /*
  * Force a disconnect
  */
@@ -608,6 +634,10 @@ void rds_conn_drop(struct rds_connection *conn)
                        conn->c_reconnect_drops,
                        conn->c_reconnect_err);
                conn->c_reconnect_warn = 0;
+
+               /* see comment for rds_conn_shutdown_lanes() */
+               if (conn->c_tos == 0)
+                       rds_conn_shutdown_lanes(conn);
        }
        conn->c_reconnect_drops++;
 
index 442cbbb55b1dd46a35f559041bf9e8cdef642657..948a3b3c036352d95bbfceec3d9a1ce837b06437 100644 (file)
@@ -772,7 +772,7 @@ static int rds_ib_move_ip(char                      *from_dev,
 
                                        work->conn = (struct rds_ib_connection *)ic->conn;
                                        INIT_DELAYED_WORK(&work->work, rds_ib_conn_drop);
-                                       queue_delayed_work(rds_wq, &work->work,
+                                       queue_delayed_work(rds_aux_wq, &work->work,
                                                msecs_to_jiffies(1000 * rds_ib_active_bonding_reconnect_delay));
                                } else
                                        rds_conn_drop(ic->conn);