]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: restore the exponential back-off scheme
authorSantosh Shilimkar <santosh.shilimkar@oracle.com>
Thu, 15 Sep 2016 21:50:17 +0000 (14:50 -0700)
committerSantosh Shilimkar <santosh.shilimkar@oracle.com>
Wed, 12 Oct 2016 17:20:26 +0000 (10:20 -0700)
Lower IP and exponential back-off scheme was added to save the
SM queries because of races but it doesn't do what its intended.
The exponential back-off scheme does a good job of backing off
for races. The code just falls back to the original scheme.

Orabug: 22347191

Tested-by: Michael Nowak <michael.nowak@oracle.com>
Tested-by: Rafael Alejandro Peralez <rafael.peralez@oracle.com>
Tested-by: Liwen Huang <liwen.huang@oracle.com>
Tested-by: Hong Liu <hong.x.liu@oracle.com>
Reviewed-by: Mukesh Kacker <mukesh.kacker@oracle.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
net/rds/connection.c
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/rds.h
net/rds/threads.c

index 5a55cd83ac00cf6cf975c62b27529d007d9e7122..241df31bedab3bba3e9892bfd3fa837104415478 100644 (file)
@@ -750,7 +750,6 @@ void rds_conn_drop(struct rds_connection *conn, int reason)
                conn->c_reconnect_warn = 1;
                conn->c_reconnect_drops = 0;
                conn->c_reconnect_err = 0;
-               conn->c_reconnect_racing = 0;
                printk(KERN_INFO "RDS/IB: connection "
                        "<%u.%u.%u.%u,%u.%u.%u.%u,%d> dropped due to '%s'\n",
                        NIPQUAD(conn->c_laddr),
index 7422d3cc2ed7e72d0aa30cba836ad7d5d201b768..8ea47b6af17a3d39b00ac1bc0bb6a944260e833b 100644 (file)
@@ -915,8 +915,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                } else if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
                        unsigned long now = get_seconds();
 
-                       conn->c_reconnect_racing++;
-
                        /*
                         * after 15 seconds, give up on existing connection
                         * attempts and make them try again.  At this point
index 078b99bdf2d269f1befe02315b59a4cccab57582..6e1c49d059bf33fb7cd5864a33a7e1f0f0722a71 100644 (file)
@@ -258,10 +258,6 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
        case RDMA_CM_EVENT_REJECTED:
                err = (int *)event->param.conn.private_data;
 
-               if (conn && event->status == RDS_REJ_CONSUMER_DEFINED &&
-                   *err <= 1)
-                       conn->c_reconnect_racing++;
-
                if (conn) {
                        if (event->status == RDS_REJ_CONSUMER_DEFINED &&
                            (*err) == 0) {
index 8645c10cf4dea5abcd203b1d632e690bf0f8b4dd..81e20b0252babdc9c69bedb3a44a86f032a55eb3 100644 (file)
@@ -293,7 +293,6 @@ struct rds_connection {
 
        unsigned int            c_rdsinfo_pending;
 
-       unsigned int            c_reconnect_racing;
        unsigned int            c_route_resolved;
 
        enum rds_conn_drop_src  c_drop_source;
index 04e9127a1dbb792004bc0e914c6fe432ab97697d..38b5ee2c8b980e9f02841348d897a4e4eacd8b64 100644 (file)
@@ -150,12 +150,8 @@ void rds_queue_reconnect(struct rds_connection *conn)
                rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies,
                conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
 
-       if (conn->c_laddr >= conn->c_faddr)
                queue_delayed_work(conn->c_wq, &conn->c_conn_w,
                                   rand % conn->c_reconnect_jiffies);
-       else
-               queue_delayed_work(conn->c_wq, &conn->c_conn_w,
-                                  msecs_to_jiffies(100));
 
        conn->c_reconnect_jiffies = min(conn->c_reconnect_jiffies * 2,
                                        rds_sysctl_reconnect_max_jiffies);
@@ -297,7 +293,6 @@ void rds_reconnect_timeout(struct work_struct *work)
                        NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
                        conn->c_tos);
                rds_conn_drop(conn, DR_RECONNECT_TIMEOUT);
-               conn->c_reconnect_racing = 0;
        }
 }
 
@@ -315,25 +310,7 @@ void rds_shutdown_worker(struct work_struct *work)
                                conn->c_tos,
                                conn_drop_reason_str(conn->c_drop_source));
 
-       /* if racing is detected, lower IP backs off and let the higher IP
-        * drives the reconnect (one-sided reconnect)
-        */
-       if (conn->c_laddr < conn->c_faddr && conn->c_reconnect_racing) {
-               rds_rtd(RDS_RTD_CM,
-                       "calling rds_conn_shutdown, conn %p:0 <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
-                       conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
-                       conn->c_tos);
-               rds_conn_shutdown(conn, 0);
-               queue_delayed_work(conn->c_wq, &conn->c_reconn_w,
-                                  msecs_to_jiffies(5000));
-       } else {
-               rds_rtd(RDS_RTD_CM,
-                       "calling rds_conn_shutdown, conn %p:1 <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
-                       conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
-                       conn->c_tos);
-               rds_conn_shutdown(conn, 1);
-       }
-
+       rds_conn_shutdown(conn, 1);
 }
 
 void rds_threads_exit(void)