]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one()
authorSowmini Varadhan <sowmini.varadhan@oracle.com>
Tue, 7 Jun 2016 18:48:43 +0000 (11:48 -0700)
committerSantosh Shilimkar <santosh.shilimkar@oracle.com>
Wed, 10 Aug 2016 23:04:31 +0000 (16:04 -0700)
Orabug: 23542064

Backport of upstream commit 9c79440e2c5e ("RDS: TCP: fix race windows
in send-path quiescence by rds_tcp_accept_one()")

The send path needs to be quiesced before resetting callbacks from
rds_tcp_accept_one(), and commit eb192840266f ("RDS:TCP: Synchronize
rds_tcp_accept_one with rds_send_xmit when resetting t_sock") achieves
this using the c_state and RDS_IN_XMIT bit following the pattern
used by rds_conn_shutdown(). However this leaves the possibility
of a race window as shown in the sequence below
        take t_conn_lock in rds_tcp_conn_connect
        send outgoing syn to peer
        drop t_conn_lock in rds_tcp_conn_connect
        incoming from peer triggers rds_tcp_accept_one, conn is
     marked CONNECTING
        wait for RDS_IN_XMIT to quiesce any rds_send_xmit threads
        call rds_tcp_reset_callbacks
        [.. race-window where incoming syn-ack can cause the conn
     to be marked UP from rds_tcp_state_change ..]
        lock_sock called from rds_tcp_reset_callbacks, and we set
     t_sock to null
As soon as the conn is marked UP in the race-window above, rds_send_xmit()
threads will proceed to rds_tcp_xmit and may encounter a null-pointer
deref on the t_sock.

Given that rds_tcp_state_change() is invoked in softirq context, whereas
rds_tcp_reset_callbacks() is in workq context, and testing for RDS_IN_XMIT
after lock_sock could result in a deadlock with tcp_sendmsg, this
commit fixes the race by using a new c_state, RDS_TCP_RESETTING, which
will prevent a transition to RDS_CONN_UP from rds_tcp_state_change().

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/rds/rds.h
net/rds/tcp.c
net/rds/tcp_connect.c
net/rds/tcp_listen.c
net/rds/threads.c

index 9d13cf9f89b9b163bb97e94375e101d7ea7fb647..d3ea4292f4dd98b2ba3ae5131221a7cb3f8454f4 100644 (file)
@@ -126,6 +126,7 @@ enum {
        RDS_CONN_CONNECTING,
        RDS_CONN_DISCONNECTING,
        RDS_CONN_UP,
+       RDS_CONN_RESETTING,
        RDS_CONN_ERROR,
 };
 
@@ -1047,6 +1048,7 @@ void rds_reject_worker(struct work_struct *);
 void rds_recv_worker(struct work_struct *);
 void rds_hb_worker(struct work_struct *);
 void rds_reconnect_timeout(struct work_struct *);
+void rds_connect_path_complete(struct rds_connection *conn, int curr);
 void rds_connect_complete(struct rds_connection *conn);
 
 /* transport.c */
index 88ce595b0580daae5d376e8d2be9b63986f0b5e1..dff1c177e5bf81628513188de37bf806d23ad338 100644 (file)
@@ -147,11 +147,23 @@ void rds_tcp_reset_callbacks(struct socket *sock,
         * potentially have transitioned to the RDS_CONN_UP state,
         * so we must quiesce any send threads before resetting
         * c_transport_data. We quiesce these threads by setting
-        * cp_state to something other than RDS_CONN_UP, and then
+        * c_state to something other than RDS_CONN_UP, and then
         * waiting for any existing threads in rds_send_xmit to
         * complete release_in_xmit(). (Subsequent threads entering
         * rds_send_xmit() will bail on !rds_conn_up().
+        *
+        * However an incoming syn-ack at this point would end up
+        * marking the conn as RDS_CONN_UP, and would again permit
+        * rds_send_xmi() threads through, so ideally we would
+        * synchronize on RDS_CONN_UP after lock_sock(), but cannot
+        * do that: waiting on !RDS_IN_XMIT after lock_sock() may
+        * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT
+        * would not get set. As a result, we set c_state to
+        * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change
+        * cannot mark rds_conn_path_up() in the window before lock_sock()
         */
+       atomic_set(&conn->c_state, RDS_CONN_RESETTING);
+       wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags));
        lock_sock(osock->sk);
        /* reset receive side state for rds_tcp_data_recv() for osock  */
        if (tc->t_tinc) {
index ecf289cf6fd77d689e17a601af245b5c29966bb3..41a83d90f0e1c4168cac4d3306f1c5e4ffec552f 100644 (file)
@@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk)
                case TCP_SYN_RECV:
                        break;
                case TCP_ESTABLISHED:
-                       rds_connect_complete(conn);
+                       rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
                        break;
                case TCP_CLOSE_WAIT:
                case TCP_CLOSE:
index 2020c40a11e6d07e5a9b106d1f39d073c1fa6b18..c3cc2a31b013ac2ff7bacdbc78bd481391eebf91 100644 (file)
@@ -134,16 +134,15 @@ int rds_tcp_accept_one(struct socket *sock)
                    !conn->c_outgoing) {
                        goto rst_nsk;
                } else {
-                       atomic_set(&conn->c_state, RDS_CONN_CONNECTING);
-                       wait_event(conn->c_waitq,
-                                  !test_bit(RDS_IN_XMIT, &conn->c_flags));
                        rds_tcp_reset_callbacks(new_sock, conn);
                        conn->c_outgoing = 0;
+                       /* rds_connect_path_complete() marks RDS_CONN_UP */
+                       rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING);
                }
        } else {
                rds_tcp_set_callbacks(new_sock, conn);
+               rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
        }
-       rds_connect_complete(conn); /* marks RDS_CONN_UP */
        new_sock = NULL;
        ret = 0;
        goto out;
index d1c364b2864beb28c39c52aa18d82942e2f27a23..e934ec8d19ec608b84aa9d701738f3d0f2a7735d 100644 (file)
@@ -77,9 +77,9 @@ EXPORT_SYMBOL_GPL(rds_wq);
 struct workqueue_struct *rds_local_wq;
 EXPORT_SYMBOL_GPL(rds_local_wq);
 
-void rds_connect_complete(struct rds_connection *conn)
+void rds_connect_path_complete(struct rds_connection *conn, int curr)
 {
-       if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) {
+       if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) {
                printk(KERN_WARNING "%s: Cannot transition to state UP"
                                ", current state is %d\n",
                                __func__,
@@ -107,6 +107,12 @@ void rds_connect_complete(struct rds_connection *conn)
        conn->c_proposed_version = RDS_PROTOCOL_VERSION;
        conn->c_route_to_base = 0;
 }
+EXPORT_SYMBOL_GPL(rds_connect_path_complete);
+
+void rds_connect_complete(struct rds_connection *conn)
+{
+       rds_connect_path_complete(conn, RDS_CONN_CONNECTING);
+}
 EXPORT_SYMBOL_GPL(rds_connect_complete);
 
 /*