]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
udp: correct reuseport selection with connected sockets
authorWillem de Bruijn <willemb@google.com>
Fri, 13 Sep 2019 01:16:39 +0000 (21:16 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 21 Sep 2019 05:18:11 +0000 (07:18 +0200)
[ Upstream commit acdcecc61285faed359f1a3568c32089cc3a8329 ]

UDP reuseport groups can hold a mix unconnected and connected sockets.
Ensure that connections only receive all traffic to their 4-tuple.

Fast reuseport returns on the first reuseport match on the assumption
that all matches are equal. Only if connections are present, return to
the previous behavior of scoring all sockets.

Record if connections are present and if so (1) treat such connected
sockets as an independent match from the group, (2) only return
2-tuple matches from reuseport and (3) do not return on the first
2-tuple reuseport match to allow for a higher scoring match later.

New field has_conns is set without locks. No other fields in the
bitmap are modified at runtime and the field is only ever set
unconditionally, so an RMW cannot miss a change.

Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Craig Gallek <kraig@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
include/net/sock_reuseport.h
net/core/sock_reuseport.c
net/ipv4/datagram.c
net/ipv4/udp.c
net/ipv6/datagram.c
net/ipv6/udp.c

index 8a5f70c7cdf24b08d863b79afe5bdc7710e6d88f..5e69fba181bc6b9d84130ea85c1787907ebd33b8 100644 (file)
@@ -21,7 +21,8 @@ struct sock_reuseport {
        unsigned int            synq_overflow_ts;
        /* ID stays the same even after the size of socks[] grows. */
        unsigned int            reuseport_id;
-       bool                    bind_inany;
+       unsigned int            bind_inany:1;
+       unsigned int            has_conns:1;
        struct bpf_prog __rcu   *prog;          /* optional BPF sock selector */
        struct sock             *socks[0];      /* array of sock pointers */
 };
@@ -35,6 +36,24 @@ extern struct sock *reuseport_select_sock(struct sock *sk,
                                          struct sk_buff *skb,
                                          int hdr_len);
 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
+
+static inline bool reuseport_has_conns(struct sock *sk, bool set)
+{
+       struct sock_reuseport *reuse;
+       bool ret = false;
+
+       rcu_read_lock();
+       reuse = rcu_dereference(sk->sk_reuseport_cb);
+       if (reuse) {
+               if (set)
+                       reuse->has_conns = 1;
+               ret = reuse->has_conns;
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
 int reuseport_get_id(struct sock_reuseport *reuse);
 
 #endif  /* _SOCK_REUSEPORT_H */
index dc4aefdf2a084b0ec46cc14415dcfda00a9aaa3d..2f89777763ad689b298fc5488c83f7f0663508db 100644 (file)
@@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk,
 
 select_by_hash:
                /* no bpf or invalid bpf result: fall back to hash usage */
-               if (!sk2)
-                       sk2 = reuse->socks[reciprocal_scale(hash, socks)];
+               if (!sk2) {
+                       int i, j;
+
+                       i = j = reciprocal_scale(hash, socks);
+                       while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+                               i++;
+                               if (i >= reuse->num_socks)
+                                       i = 0;
+                               if (i == j)
+                                       goto out;
+                       }
+                       sk2 = reuse->socks[i];
+               }
        }
 
 out:
index 7bd29e694603a8a19d7c114c2acae847e3fdd170..9a0fe0c2fa02c9707e6fc8c02529a48e84f7d680 100644 (file)
@@ -15,6 +15,7 @@
 #include <net/sock.h>
 #include <net/route.h>
 #include <net/tcp_states.h>
+#include <net/sock_reuseport.h>
 
 int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
        }
        inet->inet_daddr = fl4->daddr;
        inet->inet_dport = usin->sin_port;
+       reuseport_has_conns(sk, true);
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
        inet->inet_id = jiffies;
index eed59c8477228a72109718a7f35ef806ba6af826..acab7738f7333e346a86a5d35b1cc5025a7c7343 100644 (file)
@@ -434,12 +434,13 @@ static struct sock *udp4_lib_lookup2(struct net *net,
                score = compute_score(sk, net, saddr, sport,
                                      daddr, hnum, dif, sdif, exact_dif);
                if (score > badness) {
-                       if (sk->sk_reuseport) {
+                       if (sk->sk_reuseport &&
+                           sk->sk_state != TCP_ESTABLISHED) {
                                hash = udp_ehashfn(net, daddr, hnum,
                                                   saddr, sport);
                                result = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (result)
+                               if (result && !reuseport_has_conns(sk, false))
                                        return result;
                        }
                        badness = score;
index 9d78c907b918a98cbb9e80154a038e31b6bddd11..694168e2302e37b37c4863b3a0fd964c741984e6 100644 (file)
@@ -27,6 +27,7 @@
 #include <net/ip6_route.h>
 #include <net/tcp_states.h>
 #include <net/dsfield.h>
+#include <net/sock_reuseport.h>
 
 #include <linux/errqueue.h>
 #include <linux/uaccess.h>
@@ -254,6 +255,7 @@ ipv4_connected:
                goto out;
        }
 
+       reuseport_has_conns(sk, true);
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
 out:
index 70b01bd950227897d69a397c63f53a937efeaccc..1258be19e1865b67bbc1264d119c66f3bf189996 100644 (file)
@@ -168,13 +168,14 @@ static struct sock *udp6_lib_lookup2(struct net *net,
                score = compute_score(sk, net, saddr, sport,
                                      daddr, hnum, dif, sdif, exact_dif);
                if (score > badness) {
-                       if (sk->sk_reuseport) {
+                       if (sk->sk_reuseport &&
+                           sk->sk_state != TCP_ESTABLISHED) {
                                hash = udp6_ehashfn(net, daddr, hnum,
                                                    saddr, sport);
 
                                result = reuseport_select_sock(sk, hash, skb,
                                                        sizeof(struct udphdr));
-                               if (result)
+                               if (result && !reuseport_has_conns(sk, false))
                                        return result;
                        }
                        result = sk;