]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: Ensure non-zero SL uses correct path before lane 0 connection is dropped
authorAjaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
Tue, 1 Jul 2014 08:57:55 +0000 (01:57 -0700)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Wed, 8 Jul 2015 21:00:02 +0000 (14:00 -0700)
There is an issue with the following scenario:
  * if non-zero lane is going down first with send completion error 12
  * Before lane 0 connection goes down, the peer initiates connection request with
    the non-zero lane
  * This non-zero lane connection request may be using old ARP entries of lane 0

This also fixes race condition between connection establishment and drop for
following scenario:
  * non-zero lane connection dropped
  * non-zero connection is initiated and this time it finds proper route and
    connection request goes through.
  * before non-zero lane connection is established at RDS layer,
    zero lane connection is getting dropped.
  * now this zero-lane connection will drop non-zero lane connection as well
    (with the assumption that non-zero lane did not find proper route).
  * when non-zero lane connection establishment event is received (REP packet),
    we have a race between connection establishment event on one CPU and
    connection drop on other CPU.

Orabug: 19133664

Signed-off-by: Ajaykumar Hotchandani <ajaykumar.hotchandani@oracle.com>
Signed-off-by: Chien-Hua Yen <chien.yen@oracle.com>
Reviewed-by: Bang Nguyen <bang.nguyen@oracle.com>
Signed-off-by: Guangyu Sun <guangyu.sun@oracle.com>
(cherry picked from commit 47d8e78f82872bbb9d709a0743ea2bdb2e9f6cbb)

net/rds/connection.c
net/rds/ib_cm.c
net/rds/rds.h

index b29d7754e5aeb9be3dfb98345bebaaf2d8f372aa..7a53a8eaae86ec2a92469374defaba4043b59a18 100644 (file)
@@ -209,6 +209,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
        conn->c_reconnect_drops = 0;
        conn->c_reconnect_err = 0;
        conn->c_proposed_version = RDS_PROTOCOL_VERSION;
+       conn->c_route_resolved = 1;
 
        INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
        INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
@@ -595,6 +596,16 @@ static void rds_conn_probe_lanes(struct rds_connection *conn)
                        tmp->c_trans == conn->c_trans) {
                        if (rds_conn_up(tmp))
                                rds_send_hb(tmp, 0);
+                       else if (rds_conn_connecting(tmp) && (tmp->c_route_resolved == 0)) {
+                               printk(KERN_INFO "RDS/IB: connection "
+                                      "<%u.%u.%u.%u,%u.%u.%u.%u,%d> "
+                                      "connecting, force reset ",
+                                      NIPQUAD(tmp->c_laddr),
+                                      NIPQUAD(tmp->c_faddr),
+                                      tmp->c_tos);
+
+                               rds_conn_drop(tmp);
+                       }
                }
        }
        rcu_read_unlock();
index f577779e6111e93849c33de17cbaf84603eac100..4f3b52cdd95acc7d5740ef8fe967830bea6c1dc8 100644 (file)
@@ -132,6 +132,8 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
        struct ib_qp_attr qp_attr;
        int err;
 
+       if (conn->c_route_resolved == 0)
+               conn->c_route_resolved = 1;
        if (event->param.conn.private_data_len >= sizeof(*dp)) {
                dp = event->param.conn.private_data;
 
@@ -176,6 +178,13 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
                return;
        }
 
+       /* Drop connection if connection state is not CONNECTING.
+          Potentially connection drop from some other place like rds_conn_probe_lanes() */
+       if (!rds_conn_connecting(conn)) {
+               rds_conn_drop(conn);
+               return;
+       }
+
        ic->i_sl = ic->i_cm_id->route.path_rec->sl;
 
        /*
@@ -1060,6 +1069,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
        struct sockaddr_in src, dest;
        int ret;
 
+       conn->c_route_resolved = 0;
        /* XXX I wonder what affect the port space has */
        /* delegate cm event handler to rdma_transport */
        ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
index 0295ee34f354c48606a1b53c000089b9107a14fd..104a0ee06e0f7c00dd58fe81086791c74c55dbbf 100644 (file)
@@ -170,6 +170,7 @@ struct rds_connection {
        unsigned int            c_rdsinfo_pending;
 
        unsigned int            c_reconnect_racing;
+       unsigned int            c_route_resolved;
 };
 
 #define RDS_FLAG_CONG_BITMAP   0x01