]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
inetpeer: do not get a refcount in inet_getpeer()
authorEric Dumazet <edumazet@google.com>
Sun, 15 Dec 2024 17:56:29 +0000 (17:56 +0000)
committerJakub Kicinski <kuba@kernel.org>
Wed, 18 Dec 2024 03:37:48 +0000 (19:37 -0800)
All inet_getpeer() callers except ip4_frag_init() don't need
to acquire a permanent refcount on the inetpeer.

They can switch to full RCU protection.

Move the refcount_inc_not_zero() into ip4_frag_init(),
so that all the other callers no longer have to
perform a pair of expensive atomic operations on
a possibly contended cache line.

inet_putpeer() no longer needs to be exported.

After this patch, my DUT can receive 8,400,000 UDP packets
per second targeting closed ports, using 50% less cpu cycles
than before.

Also change two calls to l3mdev_master_ifindex() by
l3mdev_master_ifindex_rcu() (Ido ideas)

Fixes: 8c2bd38b95f7 ("icmp: change the order of rate limits")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20241215175629.1248773-5-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/ipv4/icmp.c
net/ipv4/inetpeer.c
net/ipv4/ip_fragment.c
net/ipv4/route.c
net/ipv6/icmp.c
net/ipv6/ip6_output.c
net/ipv6/ndisc.c

index 5eeb9f569a706cf2766d74bcf1a667c8930804f2..094084b61bff8a17c4e85c99019b84e9cba21599 100644 (file)
@@ -312,7 +312,6 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
        struct dst_entry *dst = &rt->dst;
        struct inet_peer *peer;
        bool rc = true;
-       int vif;
 
        if (!apply_ratelimit)
                return true;
@@ -321,12 +320,12 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
        if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))
                goto out;
 
-       vif = l3mdev_master_ifindex(dst->dev);
-       peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif);
+       rcu_read_lock();
+       peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr,
+                              l3mdev_master_ifindex_rcu(dst->dev));
        rc = inet_peer_xrlim_allow(peer,
                                   READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
-       if (peer)
-               inet_putpeer(peer);
+       rcu_read_unlock();
 out:
        if (!rc)
                __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
index 28c3ae5bc4a0b62030bd190dbe5284632ea23efd..e02484f4d22b8ea47cbaeed46c5fb0a7411462a1 100644 (file)
@@ -109,8 +109,6 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
                p = rb_entry(parent, struct inet_peer, rb_node);
                cmp = inetpeer_addr_cmp(daddr, &p->daddr);
                if (cmp == 0) {
-                       if (!refcount_inc_not_zero(&p->refcnt))
-                               break;
                        now = jiffies;
                        if (READ_ONCE(p->dtime) != now)
                                WRITE_ONCE(p->dtime, now);
@@ -169,6 +167,7 @@ static void inet_peer_gc(struct inet_peer_base *base,
        }
 }
 
+/* Must be called under RCU : No refcount change is done here. */
 struct inet_peer *inet_getpeer(struct inet_peer_base *base,
                               const struct inetpeer_addr *daddr)
 {
@@ -179,10 +178,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
        /* Attempt a lockless lookup first.
         * Because of a concurrent writer, we might not find an existing entry.
         */
-       rcu_read_lock();
        seq = read_seqbegin(&base->lock);
        p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
-       rcu_read_unlock();
 
        if (p)
                return p;
@@ -200,7 +197,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
                if (p) {
                        p->daddr = *daddr;
                        p->dtime = (__u32)jiffies;
-                       refcount_set(&p->refcnt, 2);
+                       refcount_set(&p->refcnt, 1);
                        atomic_set(&p->rid, 0);
                        p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
                        p->rate_tokens = 0;
@@ -228,7 +225,6 @@ void inet_putpeer(struct inet_peer *p)
        if (refcount_dec_and_test(&p->refcnt))
                kfree_rcu(p, rcu);
 }
-EXPORT_SYMBOL_GPL(inet_putpeer);
 
 /*
  *     Check transmit rate limitation for given message.
index 46e1171299f22ccf0b201eabbff5d3279a0703d8..7a435746a22dee9f11c0dc732a8b5a7724f4eea3 100644 (file)
@@ -82,15 +82,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
 {
        struct ipq *qp = container_of(q, struct ipq, q);
-       struct net *net = q->fqdir->net;
-
        const struct frag_v4_compare_key *key = a;
+       struct net *net = q->fqdir->net;
+       struct inet_peer *p = NULL;
 
        q->key.v4 = *key;
        qp->ecn = 0;
-       qp->peer = q->fqdir->max_dist ?
-               inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif) :
-               NULL;
+       if (q->fqdir->max_dist) {
+               rcu_read_lock();
+               p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif);
+               if (p && !refcount_inc_not_zero(&p->refcnt))
+                       p = NULL;
+               rcu_read_unlock();
+       }
+       qp->peer = p;
 }
 
 static void ip4_frag_free(struct inet_frag_queue *q)
index 297a9939c6e74beffc592dbdd7266281fe842440..9f9d4e6ea1b9287c0d758e9bdf543a92b14974ef 100644 (file)
@@ -870,11 +870,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
        }
        log_martians = IN_DEV_LOG_MARTIANS(in_dev);
        vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
-       rcu_read_unlock();
 
        net = dev_net(rt->dst.dev);
        peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif);
        if (!peer) {
+               rcu_read_unlock();
                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
                          rt_nexthop(rt, ip_hdr(skb)->daddr));
                return;
@@ -893,7 +893,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
         */
        if (peer->n_redirects >= ip_rt_redirect_number) {
                peer->rate_last = jiffies;
-               goto out_put_peer;
+               goto out_unlock;
        }
 
        /* Check for load limit; set rate_last to the latest sent
@@ -914,8 +914,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
                                             &ip_hdr(skb)->saddr, inet_iif(skb),
                                             &ip_hdr(skb)->daddr, &gw);
        }
-out_put_peer:
-       inet_putpeer(peer);
+out_unlock:
+       rcu_read_unlock();
 }
 
 static int ip_error(struct sk_buff *skb)
@@ -975,9 +975,9 @@ static int ip_error(struct sk_buff *skb)
                break;
        }
 
+       rcu_read_lock();
        peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
-                              l3mdev_master_ifindex(skb->dev));
-
+                              l3mdev_master_ifindex_rcu(skb->dev));
        send = true;
        if (peer) {
                now = jiffies;
@@ -989,8 +989,9 @@ static int ip_error(struct sk_buff *skb)
                        peer->rate_tokens -= ip_rt_error_cost;
                else
                        send = false;
-               inet_putpeer(peer);
        }
+       rcu_read_unlock();
+
        if (send)
                icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
 
index 4593e3992c67b84e3a10f30be28762974094d21f..a6984a29fdb9dd972a11ca9f8d5e794c443bac6f 100644 (file)
@@ -222,10 +222,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
                if (rt->rt6i_dst.plen < 128)
                        tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
 
+               rcu_read_lock();
                peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
                res = inet_peer_xrlim_allow(peer, tmo);
-               if (peer)
-                       inet_putpeer(peer);
+               rcu_read_unlock();
        }
        if (!res)
                __ICMP6_INC_STATS(net, ip6_dst_idev(dst),
index cdcbb3b6c5da3abed0c94d0ade8118d6799a60bc..d577bf2f3053873d27b241029592cdbb0a124ad7 100644 (file)
@@ -613,6 +613,7 @@ int ip6_forward(struct sk_buff *skb)
                else
                        target = &hdr->daddr;
 
+               rcu_read_lock();
                peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
 
                /* Limit redirects both by destination (here)
@@ -620,8 +621,7 @@ int ip6_forward(struct sk_buff *skb)
                 */
                if (inet_peer_xrlim_allow(peer, 1*HZ))
                        ndisc_send_redirect(skb, target);
-               if (peer)
-                       inet_putpeer(peer);
+               rcu_read_unlock();
        } else {
                int addrtype = ipv6_addr_type(&hdr->saddr);
 
index f113554d13325453cd04ce4e5686d837943e96ff..d044c67019de6da1eb29dee875cf8cda30210ceb 100644 (file)
@@ -1731,10 +1731,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
                          "Redirect: destination is not a neighbour\n");
                goto release;
        }
+
+       rcu_read_lock();
        peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
        ret = inet_peer_xrlim_allow(peer, 1*HZ);
-       if (peer)
-               inet_putpeer(peer);
+       rcu_read_unlock();
+
        if (!ret)
                goto release;