]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
RDS-TCP: Make RDS-TCP work correctly when it is set up in a netns other than init_net
authorSowmini Varadhan <sowmini.varadhan@oracle.com>
Fri, 28 Aug 2015 11:16:01 +0000 (07:16 -0400)
committerMukesh Kacker <mukesh.kacker@oracle.com>
Fri, 4 Sep 2015 02:07:48 +0000 (19:07 -0700)
Open the sockets calling sock_create_kern() with the correct struct net
pointer, and use that struct net pointer when verifying the
address passed to rds_bind().

Backport of upstream commit: d5a8ac28a7ff2f250d1bedbb6008dd2f6f6f1638

Orabug: 21437445

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
12 files changed:
net/rds/bind.c
net/rds/connection.c
net/rds/ib.c
net/rds/ib_cm.c
net/rds/iw.c
net/rds/iw_cm.c
net/rds/rds.h
net/rds/send.c
net/rds/tcp.c
net/rds/tcp_connect.c
net/rds/tcp_listen.c
net/rds/transport.c

index 9510dc64691b4759d11426ec0a170b25306e7fc8..7c14afe1b394e1e3aa9c47c2b33f8d66547bd33f 100644 (file)
@@ -201,7 +201,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
                ret = 0;
                goto out;
        }
-       trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
+       trans = rds_trans_get_preferred(sock_net(sock->sk),
+                                       sin->sin_addr.s_addr);
        if (!trans) {
                ret = -EADDRNOTAVAIL;
                rds_remove_bound(rs);
index 1d118e053725a1d36d2e70e1634ab997f311341c..0dd0cef5ca8d64851769a35343cea6224d7bb9a1 100644 (file)
@@ -118,7 +118,8 @@ void rds_conn_reset(struct rds_connection *conn)
  * For now they are not garbage collected once they're created.  They
  * are torn down as the module is removed, if ever.
  */
-static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
+static struct rds_connection *__rds_conn_create(struct net *net,
+                                               __be32 laddr, __be32 faddr,
                                       struct rds_transport *trans, gfp_t gfp,
                                       u8 tos,
                                       int is_outgoing)
@@ -164,6 +165,7 @@ new_conn:
        conn->c_faddr = faddr;
        spin_lock_init(&conn->c_lock);
        conn->c_next_tx_seq = 1;
+       rds_conn_net_set(conn, net);
 
        init_waitqueue_head(&conn->c_waitq);
        INIT_LIST_HEAD(&conn->c_send_queue);
@@ -183,7 +185,7 @@ new_conn:
         * can bind to the destination address then we'd rather the messages
         * flow through loopback rather than either transport.
         */
-       loop_trans = rds_trans_get_preferred(faddr);
+       loop_trans = rds_trans_get_preferred(net, faddr);
        if (loop_trans) {
                rds_trans_put(loop_trans);
                conn->c_loopback = 1;
@@ -279,19 +281,21 @@ out:
        return conn;
 }
 
-struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create(struct net *net,
+                                      __be32 laddr, __be32 faddr,
                                        struct rds_transport *trans,
                                        u8 tos, gfp_t gfp)
 {
-       return __rds_conn_create(laddr, faddr, trans, gfp, tos, 0);
+       return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 0);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create);
 
-struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create_outgoing(struct net *net,
+                                               __be32 laddr, __be32 faddr,
                                        struct rds_transport *trans,
                                        u8 tos, gfp_t gfp)
 {
-       return __rds_conn_create(laddr, faddr, trans, gfp, tos, 1);
+       return __rds_conn_create(net, laddr, faddr, trans, gfp, tos, 1);
 }
 EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
 
index 3034308c69c3a85a6b793cf9f71e3fc3ed432636..7bdae390143799429d2cd4edadafc71319452d6b 100644 (file)
@@ -403,7 +403,7 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
  * allowed to influence which paths have priority.  We could call userspace
  * asserting this policy "routing".
  */
-static int rds_ib_laddr_check(__be32 addr)
+static int rds_ib_laddr_check(struct net *net, __be32 addr)
 {
        int ret;
        struct rdma_cm_id *cm_id;
index 4f3b52cdd95acc7d5740ef8fe967830bea6c1dc8..a1bbf0467cd90fd8f05ad0e601824a2ae1e8b596 100644 (file)
@@ -792,8 +792,9 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
                 (unsigned long long)be64_to_cpu(lguid),
                 (unsigned long long)be64_to_cpu(fguid));
 
-       conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
-                              dp->dp_tos, GFP_KERNEL);
+       /* RDS/IB is not currently netns aware, thus init_net */
+       conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
+                              &rds_ib_transport, dp->dp_tos, GFP_KERNEL);
        if (IS_ERR(conn)) {
                rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
                conn = NULL;
@@ -801,7 +802,8 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
        }
 
        if (dp->dp_tos && !conn->c_base_conn) {
-               conn->c_base_conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr,
+               conn->c_base_conn = rds_conn_create(&init_net,
+                                       dp->dp_daddr, dp->dp_saddr,
                                        &rds_ib_transport, 0, GFP_KERNEL);
                if (IS_ERR(conn->c_base_conn)) {
                        conn = NULL;
index d9451aa1d9e47c2407824305d359cacb80aa9691..2e57057a1434f06d3f36e2927206139feb9101fe 100644 (file)
@@ -216,7 +216,7 @@ static void rds_iw_ic_info(struct socket *sock, unsigned int len,
  * allowed to influence which paths have priority.  We could call userspace
  * asserting this policy "routing".
  */
-static int rds_iw_laddr_check(__be32 addr)
+static int rds_iw_laddr_check(struct net *net, __be32 addr)
 {
        int ret;
        struct rdma_cm_id *cm_id;
index 5be52ba4f8359c696c660fe2ed849a31ef926ae2..edcc5858fbbeea304749d17547523b4f94df42fa 100644 (file)
@@ -395,8 +395,9 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
                 &dp->dp_saddr, &dp->dp_daddr,
                 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version));
 
-       conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_iw_transport,
-                              0, GFP_KERNEL);
+       /* RDS/IW is not currently netns aware, thus init_net */
+       conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
+                              &rds_iw_transport, 0, GFP_KERNEL);
        if (IS_ERR(conn)) {
                rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
                conn = NULL;
index a80dfdb3ee4b7000fe23bff0e08db4897b6855b3..be8306c35b3748ab841bf43d316fa284171f627e 100644 (file)
@@ -146,6 +146,7 @@ struct rds_connection {
        /* Protocol version */
        unsigned int            c_proposed_version;
        unsigned int            c_version;
+       possible_net_t          c_net;
 
        /* Re-connect stall diagnostics */
        unsigned long           c_reconnect_start;
@@ -173,6 +174,18 @@ struct rds_connection {
        unsigned int            c_route_resolved;
 };
 
+static inline
+struct net *rds_conn_net(struct rds_connection *conn)
+{
+       return read_pnet(&conn->c_net);
+}
+
+static inline
+void rds_conn_net_set(struct rds_connection *conn, struct net *net)
+{
+       write_pnet(&conn->c_net, net);
+}
+
 #define RDS_FLAG_CONG_BITMAP           0x01
 #define RDS_FLAG_ACK_REQUIRED          0x02
 #define RDS_FLAG_RETRANSMITTED         0x04
@@ -476,7 +489,7 @@ struct rds_transport {
        unsigned int            t_prefer_loopback:1;
        unsigned int            t_type;
 
-       int (*laddr_check)(__be32 addr);
+       int (*laddr_check)(struct net *net, __be32 addr);
        int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
        void (*conn_free)(void *data);
        int (*conn_connect)(struct rds_connection *conn);
@@ -683,10 +696,12 @@ struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
 /* conn.c */
 int rds_conn_init(void);
 void rds_conn_exit(void);
-struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create(struct net *net,
+                                      __be32 laddr, __be32 faddr,
                                        struct rds_transport *trans,
                                        u8 tos, gfp_t gfp);
-struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create_outgoing(struct net *net,
+                                               __be32 laddr, __be32 faddr,
                                struct rds_transport *trans,
                                u8 tos, gfp_t gfp);
 struct rds_connection *rds_conn_find(__be32 laddr, __be32 faddr,
@@ -892,7 +907,7 @@ void rds_connect_complete(struct rds_connection *conn);
 /* transport.c */
 int rds_trans_register(struct rds_transport *trans);
 void rds_trans_unregister(struct rds_transport *trans);
-struct rds_transport *rds_trans_get_preferred(__be32 addr);
+struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
 void rds_trans_put(struct rds_transport *trans);
 unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
                                       unsigned int avail);
index 3174eb02c166e3310b816823016c90845adbacbc..cc4835a81afed3908c5a01787e48680ec326cb4f 100644 (file)
@@ -1240,7 +1240,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
                        rs->rs_tos == rs->rs_conn->c_tos)
                conn = rs->rs_conn;
        else {
-               conn = rds_conn_create_outgoing(rs->rs_bound_addr, daddr,
+               conn = rds_conn_create_outgoing(sock_net(sock->sk),
+                                               rs->rs_bound_addr, daddr,
                                        rs->rs_transport, rs->rs_tos,
                                        sock->sk->sk_allocation);
                if (IS_ERR(conn)) {
@@ -1250,6 +1251,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 
                if (rs->rs_tos && !conn->c_base_conn) {
                        conn->c_base_conn = rds_conn_create_outgoing(
+                                       sock_net(sock->sk),
                                        rs->rs_bound_addr, daddr,
                                        rs->rs_transport, 0,
                                        sock->sk->sk_allocation);
index a3dcc1e381a66ad4b4b44ef67fd5fa049e0c9463..07918d42c0f5b9c406c67a377c4b215b29b1b28e 100644 (file)
@@ -187,9 +187,9 @@ out:
        spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
 }
 
-static int rds_tcp_laddr_check(__be32 addr)
+static int rds_tcp_laddr_check(struct net *net, __be32 addr)
 {
-       if (inet_addr_type(&init_net, addr) == RTN_LOCAL)
+       if (inet_addr_type(net, addr) == RTN_LOCAL)
                return 0;
        return -EADDRNOTAVAIL;
 }
index 62ac2bbf686e58b0d81df9db38d3a436533ac4a5..79bf36f79e3f880fc0076292e988f79f31575bd4 100644 (file)
@@ -79,7 +79,8 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
        struct sockaddr_in src, dest;
        int ret;
 
-       ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+       ret = sock_create_kern(rds_conn_net(conn), PF_INET, SOCK_STREAM,
+                              IPPROTO_TCP, &sock);
        if (ret < 0)
                goto out;
 
index 3d7df924c4ef2d413744f0c908f0bd9fe4cf356a..41abc08c9b2c607dbcf0b30ec05b112fff7a01c2 100644 (file)
@@ -84,8 +84,9 @@ static int rds_tcp_accept_one(struct socket *sock)
        struct inet_sock *inet;
        struct rds_tcp_connection *rs_tcp;
 
-       ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
-                              sock->sk->sk_protocol, &new_sock);
+       ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family,
+                              sock->sk->sk_type, sock->sk->sk_protocol,
+                              &new_sock);
        if (ret)
                goto out;
 
@@ -107,8 +108,9 @@ static int rds_tcp_accept_one(struct socket *sock)
                  NIPQUAD(inet->inet_saddr), ntohs(inet->inet_sport),
                  NIPQUAD(inet->inet_daddr), ntohs(inet->inet_dport));
 
-       conn = rds_conn_create(inet->inet_saddr, inet->inet_daddr, &rds_tcp_transport,
-                              0, GFP_KERNEL);
+       conn = rds_conn_create(sock_net(sock->sk),
+                              inet->inet_saddr, inet->inet_daddr,
+                              &rds_tcp_transport, 0, GFP_KERNEL);
        if (IS_ERR(conn)) {
                ret = PTR_ERR(conn);
                goto out;
@@ -186,7 +188,13 @@ int rds_tcp_listen_init(void)
        struct socket *sock = NULL;
        int ret;
 
-       ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+       /* MUST call sock_create_kern directly so that we avoid get_net()
+        * in sk_alloc(). Doing a get_net() will result in cleanup_net()
+        * never getting invoked, which will leave sock and other things
+        * in limbo.
+        */
+       ret = sock_create_kern(current->nsproxy->net_ns, PF_INET, SOCK_STREAM,
+                              IPPROTO_TCP, &sock);
        if (ret < 0)
                goto out;
 
index e3a4811d8245550e71627a27152c79cda08d9152..d2bb778bd55aba0daf99735f8f703ee0d984b0c5 100644 (file)
@@ -77,7 +77,7 @@ void rds_trans_put(struct rds_transport *trans)
                module_put(trans->t_owner);
 }
 
-struct rds_transport *rds_trans_get_preferred(__be32 addr)
+struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr)
 {
        struct rds_transport *ret = NULL;
        struct rds_transport *trans;
@@ -90,7 +90,7 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr)
        for (i = 0; i < RDS_TRANS_COUNT; i++) {
                trans = transports[i];
 
-               if (trans && (trans->laddr_check(addr) == 0) &&
+               if (trans && (trans->laddr_check(net, addr) == 0) &&
                    (!trans->t_owner || try_module_get(trans->t_owner))) {
                        ret = trans;
                        break;