udp: Handle ICMP errors for tunnels with same destination port on both endpoints

author Stefano Brivio <sbrivio@redhat.com>

Thu, 8 Nov 2018 11:19:14 +0000 (12:19 +0100)

committer David S. Miller <davem@davemloft.net>

Fri, 9 Nov 2018 01:13:08 +0000 (17:13 -0800)
author Stefano Brivio <sbrivio@redhat.com>
Thu, 8 Nov 2018 11:19:14 +0000 (12:19 +0100)
committer David S. Miller <davem@davemloft.net>
Fri, 9 Nov 2018 01:13:08 +0000 (17:13 -0800)
diff --git a/include/linux/udp.h b/include/linux/udp.h

index 0a9c54e76305abc4241fb2fc769e58c48ecef61e..2725c83395bfdd9e0ca0b78a1859bb6203af9f1c 100644 (file)
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -77,6 +77,7 @@ struct udp_sock {
          * For encapsulation sockets.
          */
         int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
+       int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
         void (*encap_destroy)(struct sock *sk);
  
         /* GRO functions for UDP socket */
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h

index 3fbe56430e3b381749fe97f006e2b66332e18447..dc8d804af3b4d283f88d0d68b7a8c5410cdaac16 100644 (file)
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -64,6 +64,8 @@ static inline int udp_sock_create(struct net *net,
  }
  
  typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
+typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk,
+                                            struct sk_buff *skb);
  typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
  typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
                                                     struct list_head *head,
@@ -76,6 +78,7 @@ struct udp_tunnel_sock_cfg {
         /* Used for setting up udp_sock fields, see udp.h for details */
         __u8  encap_type;
         udp_tunnel_encap_rcv_t encap_rcv;
+       udp_tunnel_encap_err_lookup_t encap_err_lookup;
         udp_tunnel_encap_destroy_t encap_destroy;
         udp_tunnel_gro_receive_t gro_receive;
         udp_tunnel_gro_complete_t gro_complete;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c

index 3488650b90ac3a97467fef6c64a3d42f73887568..ce759b61f6cde206fe060bc1ef41929cbf663506 100644 (file)
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -583,6 +583,62 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
         return true;
  }
  
+DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+void udp_encap_enable(void)
+{
+       static_branch_enable(&udp_encap_needed_key);
+}
+EXPORT_SYMBOL(udp_encap_enable);
+
+/* Try to match ICMP errors to UDP tunnels by looking up a socket without
+ * reversing source and destination port: this will match tunnels that force the
+ * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
+ * lwtunnels might actually break this assumption by being configured with
+ * different destination ports on endpoints, in this case we won't be able to
+ * trace ICMP messages back to them.
+ *
+ * Then ask the tunnel implementation to match the error against a valid
+ * association.
+ *
+ * Return the socket if we have a match.
+ */
+static struct sock *__udp4_lib_err_encap(struct net *net,
+                                        const struct iphdr *iph,
+                                        struct udphdr *uh,
+                                        struct udp_table *udptable,
+                                        struct sk_buff *skb)
+{
+       int (*lookup)(struct sock *sk, struct sk_buff *skb);
+       int network_offset, transport_offset;
+       struct udp_sock *up;
+       struct sock *sk;
+
+       sk = __udp4_lib_lookup(net, iph->daddr, uh->source,
+                              iph->saddr, uh->dest, skb->dev->ifindex, 0,
+                              udptable, NULL);
+       if (!sk)
+               return NULL;
+
+       network_offset = skb_network_offset(skb);
+       transport_offset = skb_transport_offset(skb);
+
+       /* Network header needs to point to the outer IPv4 header inside ICMP */
+       skb_reset_network_header(skb);
+
+       /* Transport header needs to point to the UDP header */
+       skb_set_transport_header(skb, iph->ihl << 2);
+
+       up = udp_sk(sk);
+       lookup = READ_ONCE(up->encap_err_lookup);
+       if (!lookup || lookup(sk, skb))
+               sk = NULL;
+
+       skb_set_transport_header(skb, transport_offset);
+       skb_set_network_header(skb, network_offset);
+
+       return sk;
+}
+
  /*
   * This routine is called by the ICMP module when it gets some
   * sort of error condition.  If err < 0 then the socket should
@@ -601,6 +657,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
         struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2));
         const int type = icmp_hdr(skb)->type;
         const int code = icmp_hdr(skb)->code;
+       bool tunnel = false;
         struct sock *sk;
         int harderr;
         int err;
@@ -610,8 +667,15 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
                                iph->saddr, uh->source, skb->dev->ifindex,
                                inet_sdif(skb), udptable, NULL);
         if (!sk) {
-               __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
-               return; /* No socket for error */
+               /* No socket for error: try tunnels before discarding */
+               if (static_branch_unlikely(&udp_encap_needed_key))
+                       sk = __udp4_lib_err_encap(net, iph, uh, udptable, skb);
+
+               if (!sk) {
+                       __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
+                       return;
+               }
+               tunnel = true;
         }
  
         err = 0;
@@ -654,6 +718,10 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable)
          *      RFC1122: OK.  Passes ICMP errors back to application, as per
          *      4.1.3.3.
          */
+       if (tunnel) {
+               /* ...not for tunnels though: we don't have a sending socket */
+               goto out;
+       }
         if (!inet->recverr) {
                 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
                         goto out;
@@ -1891,13 +1959,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
         return 0;
  }
  
-DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
-void udp_encap_enable(void)
-{
-       static_branch_enable(&udp_encap_needed_key);
-}
-EXPORT_SYMBOL(udp_encap_enable);
-
  /* returns:
   *  -1: error
   *   0: success
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c

index 6539ff15e9a3420db867bc2174aab3e196f97e43..d0c412fc56adcdff4c0df10d5e1546e6ba1368eb 100644 (file)
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -68,6 +68,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
  
         udp_sk(sk)->encap_type = cfg->encap_type;
         udp_sk(sk)->encap_rcv = cfg->encap_rcv;
+       udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup;
         udp_sk(sk)->encap_destroy = cfg->encap_destroy;
         udp_sk(sk)->gro_receive = cfg->gro_receive;
         udp_sk(sk)->gro_complete = cfg->gro_complete;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c

index c55698d19d6847f0dab33d3b222a8e02fc81df95..1216c920f945999d3d346c01cbaaefec3a1abfc9 100644 (file)
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -462,6 +462,61 @@ csum_copy_err:
         goto try_again;
  }
  
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+void udpv6_encap_enable(void)
+{
+       static_branch_enable(&udpv6_encap_needed_key);
+}
+EXPORT_SYMBOL(udpv6_encap_enable);
+
+/* Try to match ICMP errors to UDP tunnels by looking up a socket without
+ * reversing source and destination port: this will match tunnels that force the
+ * same destination port on both endpoints (e.g. VXLAN, GENEVE). Note that
+ * lwtunnels might actually break this assumption by being configured with
+ * different destination ports on endpoints, in this case we won't be able to
+ * trace ICMP messages back to them.
+ *
+ * Then ask the tunnel implementation to match the error against a valid
+ * association.
+ *
+ * Return the socket if we have a match.
+ */
+static struct sock *__udp6_lib_err_encap(struct net *net,
+                                        const struct ipv6hdr *hdr, int offset,
+                                        struct udphdr *uh,
+                                        struct udp_table *udptable,
+                                        struct sk_buff *skb)
+{
+       int (*lookup)(struct sock *sk, struct sk_buff *skb);
+       int network_offset, transport_offset;
+       struct udp_sock *up;
+       struct sock *sk;
+
+       sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source,
+                              &hdr->saddr, uh->dest,
+                              inet6_iif(skb), 0, udptable, skb);
+       if (!sk)
+               return NULL;
+
+       network_offset = skb_network_offset(skb);
+       transport_offset = skb_transport_offset(skb);
+
+       /* Network header needs to point to the outer IPv6 header inside ICMP */
+       skb_reset_network_header(skb);
+
+       /* Transport header needs to point to the UDP header */
+       skb_set_transport_header(skb, offset);
+
+       up = udp_sk(sk);
+       lookup = READ_ONCE(up->encap_err_lookup);
+       if (!lookup || lookup(sk, skb))
+               sk = NULL;
+
+       skb_set_transport_header(skb, transport_offset);
+       skb_set_network_header(skb, network_offset);
+       return sk;
+}
+
  void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                     u8 type, u8 code, int offset, __be32 info,
                     struct udp_table *udptable)
@@ -471,6 +526,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
         const struct in6_addr *saddr = &hdr->saddr;
         const struct in6_addr *daddr = &hdr->daddr;
         struct udphdr *uh = (struct udphdr *)(skb->data+offset);
+       bool tunnel = false;
         struct sock *sk;
         int harderr;
         int err;
@@ -479,9 +535,18 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
         sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
                                inet6_iif(skb), inet6_sdif(skb), udptable, skb);
         if (!sk) {
-               __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
-                                 ICMP6_MIB_INERRORS);
-               return;
+               /* No socket for error: try tunnels before discarding */
+               if (static_branch_unlikely(&udpv6_encap_needed_key)) {
+                       sk = __udp6_lib_err_encap(net, hdr, offset, uh,
+                                                 udptable, skb);
+               }
+
+               if (!sk) {
+                       __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
+                                         ICMP6_MIB_INERRORS);
+                       return;
+               }
+               tunnel = true;
         }
  
         harderr = icmpv6_err_convert(type, code, &err);
@@ -495,10 +560,19 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
                         harderr = 1;
         }
         if (type == NDISC_REDIRECT) {
-               ip6_sk_redirect(skb, sk);
+               if (tunnel) {
+                       ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
+                                    sk->sk_mark, sk->sk_uid);
+               } else {
+                       ip6_sk_redirect(skb, sk);
+               }
                 goto out;
         }
  
+       /* Tunnels don't have an application socket: don't pass errors back */
+       if (tunnel)
+               goto out;
+
         if (!np->recverr) {
                 if (!harderr || sk->sk_state != TCP_ESTABLISHED)
                         goto out;
@@ -547,13 +621,6 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
         __udp6_lib_err(skb, opt, type, code, offset, info, &udp_table);
  }
  
-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
-void udpv6_encap_enable(void)
-{
-       static_branch_enable(&udpv6_encap_needed_key);
-}
-EXPORT_SYMBOL(udpv6_encap_enable);
-
  static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
  {
         struct udp_sock *up = udp_sk(sk);
author	Stefano Brivio <sbrivio@redhat.com>
	Thu, 8 Nov 2018 11:19:14 +0000 (12:19 +0100)
committer	David S. Miller <davem@davemloft.net>
	Fri, 9 Nov 2018 01:13:08 +0000 (17:13 -0800)
include/linux/udp.h		patch \| blob \| history
include/net/udp_tunnel.h		patch \| blob \| history
net/ipv4/udp.c		patch \| blob \| history
net/ipv4/udp_tunnel.c		patch \| blob \| history
net/ipv6/udp.c		patch \| blob \| history