This reverts commit
48c2d5f5e2580c9550db8ea4b433cf478925487e.
This commit is reverted for two reasons. Firstly, it doesn't fix the
problem it is supposed to fix. Secondly, it may, in some special
circumstances, create a long-lasting connection reject scenario.
As to the first reason, consider the following scenario during
fail-back. Let's say node A fails back first. It sends a DREQ. Node B
drops the IB connection. Now, both will attempt to connect and both
will perform route resolution. Since both nodes attempt to connect at
the same time, you get a race, and then the lower IP will connect. It
does so and succeeds, because both ends have done route
resolution. Traffic continue to flow. Then, node B fails back and the
connection is torn down again. This is exactly what commit
48c2d5f5e258 ("net/rds: prevent RDS connections using stale ARP
entries") said it would prevent.
As to the second reason, the following is an excerpt from the kernel
trace buffer (slightly edited for better brevity):
rds_ib_cm_handle_connect: 1033: saddr ::ffff:192.168.217.20 daddr ::ffff:192.168.216.252 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 0
rds_ib_cm_handle_connect: 1077: no route resolution saddr 0.0.0.0 daddr 0.0.0.0 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 0
rds_ib_cm_handle_connect: 1033: saddr ::ffff:192.168.216.253 daddr ::ffff:192.168.216.252 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001887599 tos 0
rds_ib_cm_handle_connect: 1077: no route resolution saddr 0.0.0.0 daddr 0.0.0.0 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001887599 tos 0
rds_ib_cm_handle_connect: 1033: saddr ::ffff:192.168.217.20 daddr ::ffff:192.168.216.252 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 2
rds_ib_cm_handle_connect: 1077: no route resolution saddr 0.0.0.0 daddr 0.0.0.0 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 2
rds_ib_cm_handle_connect: 1033: saddr ::ffff:192.168.216.253 daddr ::ffff:192.168.216.252 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001887599 tos 0
rds_ib_cm_handle_connect: 1077: no route resolution saddr 0.0.0.0 daddr 0.0.0.0 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001887599 tos 0
rds_ib_cm_handle_connect: 1033: saddr ::ffff:192.168.217.20 daddr ::ffff:192.168.216.252 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 4
rds_ib_cm_handle_connect: 1077: no route resolution saddr 0.0.0.0 daddr 0.0.0.0 RDSv4.1 lguid 0x10e00001888efa fguid 0x10e00001778a52 tos 4
Signed-off-by: Brian Maly <brian.maly@oracle.com>
Conflicts:
* net/rds/ib_cm.c
* net/rds/rdma_transport.c
The nature of the conflicts were ftrace points that had been
IPv6-ified.
Orabug:
29391909
Signed-off-by: Håkon Bugge <haakon.bugge@oracle.com>
Tested-by: Rosa Lopez <rosa.lopez@oracle.com>
Reviewed-by: Dag Moxnes <dag.moxnes@oracle.com>
Signed-off-by: Brian Maly <brian.maly@oracle.com>
mutex_lock(&conn->c_cm_lock);
ic = conn->c_transport_data;
- if (conn->c_route && !rds_conn_self_loopback_passive(conn)) {
- rds_rtd_ptr(RDS_RTD_CM,
- "no route resolution cm_id %p conn %p <%pI6c,%pI6c,%d> RDSv%u.%u lguid 0x%llx fguid 0x%llx\n",
- cm_id, conn,
- saddr6, daddr6, dp_cmn->ricpc_tos,
- RDS_PROTOCOL_MAJOR(version),
- RDS_PROTOCOL_MINOR(version),
- (unsigned long long)be64_to_cpu(lguid),
- (unsigned long long)be64_to_cpu(fguid));
- goto out;
- }
-
if (ic && cm_seq_check_enable) {
if (cm_req_seq != ic->i_prev_seq) {
rds_rtd(RDS_RTD_CM_EXT_P,
* needs to update the sl manually. As for now, RDS is assuming
* that it is a 1:1 in tos to sl mapping.
*/
- rds_rtd(RDS_RTD_CM, "ibic: %p cm_id: %p\n", ibic->i_cm_id->context, cm_id->context);
- conn->c_route = 0;
cm_id->route.path_rec[0].sl = TOS_TO_SL(conn->c_tos);
cm_id->route.path_rec[0].qos_class = conn->c_tos;
rds_rtd_ptr(RDS_RTD_CM,
"ADDR_CHANGE: calling rds_conn_drop conn %p <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
conn->c_reconnect_racing = 0;
- /* reset route resolution flag */
- conn->c_route = 1;
if (!rds_conn_self_loopback_passive(conn))
rds_conn_drop(conn, DR_IB_ADDR_CHANGE);
}
"DISCONNECT event - dropping conn %p <%pI6c,%pI6c,%d>\n",
conn, &conn->c_laddr, &conn->c_faddr, conn->c_tos);
conn->c_reconnect_racing = 0;
- /* reset route resolution flag */
- conn->c_route = 1;
if (!rds_conn_self_loopback_passive(conn))
rds_conn_drop(conn, DR_IB_DISCONNECTED_EVENT);
break;
unsigned int cp_rdsinfo_pending;
unsigned int cp_reconnect_racing;
- unsigned int cp_route_resolved;
enum rds_conn_drop_src cp_drop_source;
unsigned char cp_acl_init;
int c_to_index;
unsigned int c_reconnect;
+
/* Qos support */
u8 c_tos;
rds_update_avg_connect_time(cp);
cp->cp_connection_start = get_seconds();
cp->cp_reconnect = 1;
- /* reset route resolution flag */
- cp->cp_route_resolved = 0;
conn->c_proposed_version = RDS_PROTOCOL_VERSION;
}
EXPORT_SYMBOL_GPL(rds_connect_path_complete);