]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
net/rds: prioritize the base connection establishment v4.1.12-105.0.20170622_2100
authorWei Lin Guay <wei.lin.guay@oracle.com>
Mon, 15 May 2017 11:52:47 +0000 (13:52 +0200)
committerChuck Anderson <chuck.anderson@oracle.com>
Fri, 23 Jun 2017 04:30:13 +0000 (21:30 -0700)
As of today, all the TOS connections can only be established after their
base connections are up.  This is due to the fact that TOS connections rely
on their base connections to perform route resolution. Nevertheless, when
all the connections drop/reconnect(e.g., ADDR_CHANGE event), the TOS
connections establishment consume the CPU resources by constantly retrying
the connection establishment until their base connections are up.

Thus, this patch delays all the TOS connections if their associated base
connections are not up. By doing so, the priority is given to the base
connections establishment. Consequently, the base connections can be
established faster and subsequent their associated TOS connections.

Orabug: 25521901

Signed-off-by: Wei Lin Guay <wei.lin.guay@oracle.com>
Reviewed-by: HÃ¥kon Bugge <haakon.bugge@oracle.com>
Reviewed-by: Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
Tested-by: Dib Chatterjee <dib.chatterjee@oracle.com>
Tested-by: Rosa Isela Lopez Romero <rosa.lopez@oracle.com>
net/rds/connection.c
net/rds/ib_cm.c
net/rds/rds.h
net/rds/threads.c

index 00d349b4491aa59d66105408a7d96e87132e2163..c31758b907110fd17a286c08e7b445fa0f818269 100644 (file)
@@ -159,7 +159,7 @@ static void __rds_conn_path_init(struct rds_connection *conn,
        INIT_DELAYED_WORK(&cp->cp_hb_w, rds_hb_worker);
        INIT_DELAYED_WORK(&cp->cp_reconn_w, rds_reconnect_timeout);
        INIT_DELAYED_WORK(&cp->cp_reject_w, rds_reject_worker);
-       INIT_WORK(&cp->cp_down_w, rds_shutdown_worker);
+       INIT_DELAYED_WORK(&cp->cp_down_w, rds_shutdown_worker);
        mutex_init(&cp->cp_cm_lock);
        cp->cp_flags = 0;
 }
@@ -421,7 +421,7 @@ void rds_conn_shutdown(struct rds_conn_path *cp, int restart)
        rcu_read_lock();
        if (!hlist_unhashed(&conn->c_hash_node) && restart) {
                rcu_read_unlock();
-               rds_queue_reconnect(cp);
+               rds_queue_reconnect(cp, DR_DEFAULT);
        } else {
                rcu_read_unlock();
        }
@@ -442,7 +442,7 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp, int shutdown)
                return;
 
        rds_conn_path_drop(cp, DR_CONN_DESTROY);
-       flush_work(&cp->cp_down_w);
+       flush_delayed_work(&cp->cp_down_w);
 
        /* now that conn down worker is flushed; there cannot be any
         * more posting of reconn timeout work. But cancel any already
@@ -852,6 +852,7 @@ void rds_conn_path_drop(struct rds_conn_path *cp, int reason)
        unsigned long now = get_seconds();
        struct rds_connection *conn = cp->cp_conn;
 
+       unsigned long delay = 0;
        cp->cp_drop_source = reason;
        if (rds_conn_path_state(cp) == RDS_CONN_UP) {
                cp->cp_reconnect_start = now;
@@ -891,13 +892,16 @@ void rds_conn_path_drop(struct rds_conn_path *cp, int reason)
 
        atomic_set(&cp->cp_state, RDS_CONN_ERROR);
 
+       if ((conn->c_tos && reason == DR_IB_ADDR_CHANGE) ||
+           reason == DR_IB_BASE_CONN_DOWN)
+               delay = msecs_to_jiffies(100);
        rds_rtd(RDS_RTD_CM_EXT,
                "RDS/%s: queueing shutdown work, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                conn->c_trans->t_type == RDS_TRANS_TCP ? "TCP" : "IB",
                conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
                conn->c_tos);
 
-       queue_work(cp->cp_wq, &cp->cp_down_w);
+       queue_delayed_work(cp->cp_wq, &cp->cp_down_w, delay);
 }
 EXPORT_SYMBOL_GPL(rds_conn_path_drop);
 
@@ -918,11 +922,18 @@ void rds_conn_path_connect_if_down(struct rds_conn_path *cp)
 
        if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&
            !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags)) {
+               if (conn->c_tos == 0 ||
+                   (conn->c_tos && rds_conn_state(cp->cp_base_conn) == RDS_CONN_UP)) {
                rds_rtd(RDS_RTD_CM_EXT,
                        "queueing connect work, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
                        conn, NIPQUAD(conn->c_laddr), NIPQUAD(conn->c_faddr),
                        conn->c_tos);
                queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, 0);
+               } else
+                       rds_rtd(RDS_RTD_CM_EXT,
+                               "skip, base conn %p down, conn %p, <%u.%u.%u.%u,%u.%u.%u.%u,%d>\n",
+                               cp->cp_base_conn, conn, NIPQUAD(conn->c_laddr),
+                               NIPQUAD(conn->c_faddr), conn->c_tos);
        }
 }
 EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down);
index 6ca5ce32e1de816044423ad8df922f5d28f2d47a..a217d978c894f5c1c9e938133c41437b464a991c 100644 (file)
@@ -1144,9 +1144,24 @@ int rds_ib_conn_path_connect(struct rds_conn_path *cp)
        struct rds_connection *conn = cp->cp_conn;
        struct rds_ib_connection *ic = conn->c_transport_data;
        struct sockaddr_in src, dest;
-       int ret;
+       int ret = 0;
 
        conn->c_route_resolved = 0;
+
+       if (conn->c_tos) {
+                       mutex_lock(&conn->c_base_conn->c_cm_lock);
+                       if (!rds_conn_transition(conn->c_base_conn, RDS_CONN_UP,
+                                                RDS_CONN_UP)) {
+                               rds_rtd(RDS_RTD_CM_EXT,
+                                       "RDS/IB: base conn %p (%p) is not up\n",
+                                       conn->c_base_conn, conn);
+                               ret = DR_IB_BASE_CONN_DOWN;
+                       }
+                       mutex_unlock(&conn->c_base_conn->c_cm_lock);
+                       if (ret)
+                               goto out;
+       }
+
        /* XXX I wonder what affect the port space has */
        /* delegate cm event handler to rdma_transport */
        ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
index b7a0227b20ba666908ff83df615ab07fb3fb56cd..61c78639958c181432f175879b4e6ac69d550c34 100644 (file)
@@ -253,7 +253,7 @@ struct rds_conn_path {
        struct delayed_work     cp_reject_w;
        struct delayed_work     cp_hb_w;
        struct delayed_work     cp_reconn_w;
-       struct work_struct      cp_down_w;
+       struct delayed_work     cp_down_w;
        struct mutex            cp_cm_lock;     /* protect cp_state & cm */
        wait_queue_head_t       cp_waitq;
 
@@ -1139,7 +1139,7 @@ void rds_threads_exit(void);
 extern struct workqueue_struct *rds_wq;
 extern struct workqueue_struct *rds_tos_wq;
 extern struct workqueue_struct *rds_local_wq;
-void rds_queue_reconnect(struct rds_conn_path *cp);
+void rds_queue_reconnect(struct rds_conn_path *cp, int reason);
 void rds_connect_worker(struct work_struct *);
 void rds_shutdown_worker(struct work_struct *);
 void rds_send_worker(struct work_struct *);
index 7a04853fd4a1b373debcd3a4e9199fc2f807f3a4..bcf240e3dea1911559b0c04fab8302ff8881f3cc 100644 (file)
@@ -132,7 +132,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
  * We should *always* start with a random backoff; otherwise a broken connection
  * will always take several iterations to be re-established.
  */
-void rds_queue_reconnect(struct rds_conn_path *cp)
+void rds_queue_reconnect(struct rds_conn_path *cp, int reason)
 {
        unsigned long delay = 0;
        unsigned long rand;
@@ -140,23 +140,30 @@ void rds_queue_reconnect(struct rds_conn_path *cp)
        bool is_tcp = conn->c_trans->t_type == RDS_TRANS_TCP;
 
        rds_rtd(RDS_RTD_CM_EXT,
-               "conn %p for %pI4 to %pI4 tos %d reconnect jiffies %lu\n", conn,
+               "conn %p for %pI4 to %pI4 tos %d reconnect jiffies %lu %s\n", conn,
                &conn->c_laddr, &conn->c_faddr, conn->c_tos,
-               cp->cp_reconnect_jiffies);
+               cp->cp_reconnect_jiffies, conn_drop_reason_str(reason));
 
        /* let peer with smaller addr initiate reconnect, to avoid duels */
        if (is_tcp && !IS_CANONICAL(conn->c_laddr, conn->c_faddr))
                return;
 
+       if (reason == DR_IB_BASE_CONN_DOWN) {
+               cp->cp_reconnect_jiffies = 0;
+               delay = msecs_to_jiffies(500);
+       }
+
        set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);
        if (cp->cp_reconnect_jiffies == 0) {
                set_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags);
+               get_random_bytes(&rand, sizeof(rand));
                cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
-               queue_delayed_work(cp->cp_wq, &cp->cp_conn_w, rand % conn->c_reconnect_jiffies);
+               queue_delayed_work(cp->cp_wq, &cp->cp_conn_w,
+                                  delay + (rand % cp->cp_reconnect_jiffies));
                return;
        }
 
-       clear_bit(RDS_INITIAL_RECONNECT, &conn->c_flags);
+       clear_bit(RDS_INITIAL_RECONNECT, &cp->cp_flags);
        if ((conn->c_laddr > conn->c_faddr) ||
            rds_conn_self_loopback_passive(conn))
                delay = msecs_to_jiffies(15000);
@@ -203,7 +210,8 @@ void rds_connect_worker(struct work_struct *work)
                                                     RDS_CONN_DOWN)) {
                                rds_rtd(RDS_RTD_CM_EXT,
                                        "reconnecting..., conn %p\n", conn);
-                               rds_queue_reconnect(cp);
+                               rds_queue_reconnect(cp, ret == DR_IB_BASE_CONN_DOWN ?
+                                               DR_IB_BASE_CONN_DOWN : DR_DEFAULT);
                        } else {
                                rds_conn_path_drop(cp, DR_CONN_CONNECT_FAIL);
                        }
@@ -334,7 +342,7 @@ void rds_shutdown_worker(struct work_struct *work)
 {
        struct rds_conn_path *cp = container_of(work,
                                                struct rds_conn_path,
-                                               cp_down_w);
+                                               cp_down_w.work);
        unsigned long now = get_seconds();
        bool is_tcp = cp->cp_conn->c_trans->t_type == RDS_TRANS_TCP;
        struct rds_connection *conn = cp->cp_conn;