]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: TOS fixes in failure paths when RDS-TCP and RDS-RDMA are run together
authorSowmini Varadhan <sowmini.varadhan@oracle.com>
Tue, 12 Apr 2016 00:17:37 +0000 (17:17 -0700)
committerChuck Anderson <chuck.anderson@oracle.com>
Thu, 14 Apr 2016 01:42:44 +0000 (18:42 -0700)
Orabug 20930687

When errors such as connection hangs or failures are encountered
over RDS-TCP,  the sending RDS, in an attempt at HA, will try to
reconnect, and trip up on all sorts of data structures intended
for ToS support. The ToS feature is currently only supported for
RDS-IB, and  unplanned/untested usage of these data
structures by RDS-TCP causes deadlocks and panics.

Until we properly design, support, and test the ToS feature for
RDS-TCP, such paths should not be wandered into. Thus this patchset
adds defensive checks to ignore rs_tos settings in rds_sendmsg() for
TCP transports, and prevents the sending of ToS heartbeat pings

Until we properly design, support, and test the ToS feature for
RDS-TCP, such paths should not be wandered into. Thus this patchset
adds defensive checks to ignore rs_tos settings in rds_sendmsg() for
TCP transports, and prevents the sending of ToS heartbeat pings
in rds_send_hb() for TCP transport.

For reference, the deadlock that can be encountered in the
hb ping path is:

 [<ffffffff814846f4>] do_tcp_setsockopt+0x244/0x710  <-- wants sock_lock
 [<ffffffff81119ead>] ? __alloc_pages_nodemask+0x12d/0x230
 [<ffffffff8125cf4d>] ? rb_insert_color+0x9d/0x160
 [<ffffffff8101cd43>] ? native_sched_clock+0x13/0x80
 [<ffffffff8101c369>] ? sched_clock+0x9/0x10
 [<ffffffff810e25a9>] ? trace_clock_local+0x9/0x10
 [<ffffffff810e8d97>] ? rb_reserve_next_event+0x67/0x480
 [<ffffffff81292a1f>] ? __alloc_and_insert_iova_range+0x17f/0x1f0
 [<ffffffff81117f52>] ? get_page_from_freelist+0x1e2/0x550
 [<ffffffff81484c1a>] tcp_setsockopt+0x2a/0x30
 [<ffffffff8142e724>] sock_common_setsockopt+0x14/0x20
 [<ffffffffa00de5cd>] rds_tcp_xmit_prepare+0x5d/0x70 [rds_tcp]
 [<ffffffffa0749b35>] rds_send_xmit+0xe5/0x860 [rds]
 [<ffffffffa074a37d>] rds_send_hb+0xcd/0x130 [rds]
 [<ffffffffa0747a1b>] rds_recv_local+0x20b/0x330 [rds]
 [<ffffffffa074851d>] rds_recv_incoming+0x7d/0x290 [rds]
 [<ffffffff8101cd43>] ? native_sched_clock+0x13/0x80
 [<ffffffff8101c369>] ? sched_clock+0x9/0x10
 [<ffffffffa00de2c6>] rds_tcp_data_recv+0x316/0x440 [rds_tcp]
 [<ffffffff8148642a>] tcp_read_sock+0xda/0x230
 [<ffffffffa00ddfb0>] ? rds_tcp_recv+0x60/0x60 [rds_tcp]
 [<ffffffff81430fb0>] ? sock_get_timestamp+0xc0/0xc0
 [<ffffffffa00dde6d>] rds_tcp_read_sock+0x4d/0x60 [rds_tcp]
 [<ffffffffa00ddefa>] rds_tcp_data_ready+0x7a/0xd0 [rds_tcp]
 [<ffffffff8148df2e>] tcp_data_queue+0x2fe/0xac0
 [<ffffffff814916a9>] tcp_rcv_established+0x349/0x740
 [<ffffffff81499bb5>] tcp_v4_do_rcv+0x125/0x1f0
 [<ffffffff8149b3e7>] tcp_v4_rcv+0x597/0x830   <---- holds sock_lock
 [<ffffffff8148b56e>] ? __tcp_ack_snd_check+0x5e/0xa0
 [<ffffffff814916cd>] ? tcp_rcv_established+0x36d/0x740
 [<ffffffff814774ed>] ip_local_deliver_finish+0xdd/0x2a0
 [<ffffffff81477730>] ip_local_deliver+0x80/0x90
 [<ffffffff81476d75>] ip_rcv_finish+0x105/0x370

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
net/rds/af_rds.c
net/rds/send.c

index a94317dfdcfffed07ee63c59d8dd2f1db3b7d52a..9aa99921ce2984765c7c01dc5d82ad0b2a62c85c 100644 (file)
@@ -228,6 +228,10 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
                if (get_user(tos, (rds_tos_t __user *)arg))
                        return -EFAULT;
 
+               if (rs->rs_transport &&
+                   rs->rs_transport->t_type == RDS_TRANS_TCP)
+                       tos = 0;
+
                spin_lock_bh(&rds_sock_lock);
                if (rs->rs_tos || rs->rs_conn) {
                        spin_unlock_bh(&rds_sock_lock);
index 5274d4e2fd615fefda8734da47caf3444cc1fa3a..5bc6400ae3687ef4d43384320405b95fb5612185 100644 (file)
@@ -1606,6 +1606,9 @@ rds_send_hb(struct rds_connection *conn, int response)
        unsigned long flags;
        int ret = 0;
 
+       if (conn->c_trans->t_type == RDS_TRANS_TCP)
+               return 0;
+
        rm = rds_message_alloc(0, GFP_ATOMIC);
        if (!rm)
                return -ENOMEM;