]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
Revert "net/rds: prevent RDS connections using stale ARP entries"
authorHåkon Bugge <haakon.bugge@oracle.com>
Thu, 7 Mar 2019 12:59:40 +0000 (13:59 +0100)
committerBrian Maly <brian.maly@oracle.com>
Tue, 21 May 2019 22:01:01 +0000 (18:01 -0400)
This reverts commit 48c2d5f5e2580c9550db8ea4b433cf478925487e.

This commit is reverted for two reasons. Firstly, it doesn't fix the
problem it is supposed to fix. Secondly, it may, in some special
circumstances, create a long-lasting connection reject scenario.

As to the first reason, consider the following scenario during
fail-back. Let's say node A fails back first. It sends a DREQ. Node B
drops the IB connection. Now, both will attempt to connect and both
will perform route resolution. Since both nodes attempt to connect at
the same time, you get a race, and then the lower IP will connect. It
does so and succeeds, because both ends have done route
resolution. Traffic continue to flow. Then, node B fails back and the
connection is torn down again. This is exactly what commit
48c2d5f5e258 ("net/rds: prevent RDS connections using stale ARP
entries") said it would prevent.

As to the second reason, the following is an excerpt from the kernel
trace buffer (slightly edited for better brevity):

rds_ib_cm_handle_connect: 1033: saddr ::ffff:192.168.217.20 daddr ::ffff:192.168.216.252 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 0
rds_ib_cm_handle_connect: 1077: no route resolution saddr 0.0.0.0 daddr 0.0.0.0 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 0
rds_ib_cm_handle_connect: 1033: saddr ::ffff:192.168.216.253 daddr ::ffff:192.168.216.252 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001887599 tos 0
rds_ib_cm_handle_connect: 1077: no route resolution saddr 0.0.0.0 daddr 0.0.0.0 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001887599 tos 0
rds_ib_cm_handle_connect: 1033: saddr ::ffff:192.168.217.20 daddr ::ffff:192.168.216.252 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 2
rds_ib_cm_handle_connect: 1077: no route resolution saddr 0.0.0.0 daddr 0.0.0.0 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 2
rds_ib_cm_handle_connect: 1033: saddr ::ffff:192.168.216.253 daddr ::ffff:192.168.216.252 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001887599 tos 0
rds_ib_cm_handle_connect: 1077: no route resolution saddr 0.0.0.0 daddr 0.0.0.0 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001887599 tos 0
rds_ib_cm_handle_connect: 1033: saddr ::ffff:192.168.217.20 daddr ::ffff:192.168.216.252 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 4
rds_ib_cm_handle_connect: 1077: no route resolution saddr 0.0.0.0 daddr 0.0.0.0 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 4

Signed-off-by: Brian Maly <brian.maly@oracle.com>
Conflicts:
   * net/rds/ib_cm.c
   * net/rds/rdma_transport.c

The nature of the conflicts were ftrace points that had been
IPv6-ified.

Orabug: 29391909

Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Tested-by: Rosa Lopez <rosa.lopez@oracle.com>
Reviewed-by: Dag Moxnes <dag.moxnes@oracle.com>
Signed-off-by: Brian Maly <brian.maly@oracle.com>
net/rds/ib_cm.c
net/rds/rdma_transport.c
net/rds/rds.h
net/rds/threads.c

index 17713db11cab2eb5e3f4274a272ea745a0be6e08..4171249a288cfbaffdc5765828b4692528bd451a 100644 (file)
@@ -1073,18 +1073,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
        mutex_lock(&conn->c_cm_lock);
        ic = conn->c_transport_data;
 
-       if (conn->c_route && !rds_conn_self_loopback_passive(conn)) {
-               rds_rtd_ptr(RDS_RTD_CM,
-                           "no route resolution cm_id %p conn %p <%pI6c,%pI6c,%d> RDSv%u.%u lguid 0x%llx fguid 0x%llx\n",
-                           cm_id, conn,
-                           saddr6, daddr6, dp_cmn->ricpc_tos,
-                           RDS_PROTOCOL_MAJOR(version),
-                           RDS_PROTOCOL_MINOR(version),
-                           (unsigned long long)be64_to_cpu(lguid),
-                           (unsigned long long)be64_to_cpu(fguid));
-               goto out;
-       }
-
        if (ic && cm_seq_check_enable) {
                if (cm_req_seq != ic->i_prev_seq) {
                        rds_rtd(RDS_RTD_CM_EXT_P,
index 259e9ece958dcd10e208398c418dec49eb37e82d..a58d9d61ec7977423610d916daefec2b75e70d7f 100644 (file)
@@ -191,8 +191,6 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                                 * needs to update the sl manually. As for now, RDS is assuming
                                 * that it is a 1:1 in tos to sl mapping.
                                 */
-                               rds_rtd(RDS_RTD_CM, "ibic: %p cm_id: %p\n", ibic->i_cm_id->context, cm_id->context);
-                               conn->c_route = 0;
                                cm_id->route.path_rec[0].sl = TOS_TO_SL(conn->c_tos);
                                cm_id->route.path_rec[0].qos_class = conn->c_tos;
                                rds_rtd_ptr(RDS_RTD_CM,
@@ -315,8 +313,6 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                                    "ADDR_CHANGE: calling rds_conn_drop conn %p <%pI6c,%pI6c,%d>\n",
                                    conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
                        conn->c_reconnect_racing = 0;
-                       /* reset route resolution flag */
-                       conn->c_route = 1;
                        if (!rds_conn_self_loopback_passive(conn))
                                rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
                }
@@ -329,8 +325,6 @@ int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id,
                            "DISCONNECT event - dropping conn %p <%pI6c,%pI6c,%d>\n",
                            conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
                conn->c_reconnect_racing = 0;
-               /* reset route resolution flag */
-               conn->c_route = 1;
                if (!rds_conn_self_loopback_passive(conn))
                        rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT);
                break;
index 049ee38b44430333f1761a2e01beba6962891f84..f8b468af858942fb71f5abafa960b84d3357c120 100644 (file)
@@ -303,7 +303,6 @@ struct rds_conn_path {
        unsigned int            cp_rdsinfo_pending;
 
        unsigned int            cp_reconnect_racing;
-       unsigned int            cp_route_resolved;
        enum rds_conn_drop_src  cp_drop_source;
 
        unsigned char           cp_acl_init;
@@ -355,6 +354,7 @@ struct rds_connection {
        int                     c_to_index;
 
        unsigned int            c_reconnect;
+
        /* Qos support */
        u8                      c_tos;
 
index 8d2ceaafb699499b17403b2bd08c13b2f3cc6a52..0c3f585c5d1a7b630024bceec4411e43a9d1af77 100644 (file)
@@ -131,8 +131,6 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)
        rds_update_avg_connect_time(cp);
        cp->cp_connection_start = get_seconds();
        cp->cp_reconnect = 1;
-       /* reset route resolution flag */
-       cp->cp_route_resolved = 0;
        conn->c_proposed_version = RDS_PROTOCOL_VERSION;
 }
 EXPORT_SYMBOL_GPL(rds_connect_path_complete);