]> www.infradead.org Git - users/hch/misc.git/commitdiff
net: use NUMA drop counters for softnet_data.dropped
authorEric Dumazet <edumazet@google.com>
Tue, 9 Sep 2025 12:19:42 +0000 (12:19 +0000)
committerJakub Kicinski <kuba@kernel.org>
Sun, 14 Sep 2025 18:35:17 +0000 (11:35 -0700)
Hosts under DOS attack can suffer from false sharing
in enqueue_to_backlog() : atomic_inc(&sd->dropped).

This is because sd->dropped can be touched from many cpus,
possibly residing on different NUMA nodes.

Generalize the sk_drop_counters infrastucture
added in commit c51613fa276f ("net: add sk->sk_drop_counters")
and use it to replace softnet_data.dropped
with NUMA friendly softnet_data.drop_counters.

This adds 64 bytes per cpu, maybe more in the future
if we increase the number of counters (currently 2)
per 'struct numa_drop_counters'.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250909121942.1202585-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/linux/ipv6.h
include/linux/netdevice.h
include/linux/udp.h
include/net/raw.h
include/net/sock.h
net/core/dev.c
net/core/net-procfs.c

index 261d02efb615cfb7fa5717a88c1b2612ef0cbd82..f43314517396777105cc20ba30cac9c651b7dbf9 100644 (file)
@@ -295,7 +295,7 @@ struct raw6_sock {
        __u32                   offset;         /* checksum offset  */
        struct icmp6_filter     filter;
        __u32                   ip6mr_table;
-       struct socket_drop_counters drop_counters;
+       struct numa_drop_counters drop_counters;
        struct ipv6_pinfo       inet6;
 };
 
index f3a3b761abfb1b883a970b04634c1ef3e7ee5407..f5a840c07cf10eac05f5317baf46c771a043bd2c 100644 (file)
@@ -3459,6 +3459,32 @@ static inline bool dev_has_header(const struct net_device *dev)
        return dev->header_ops && dev->header_ops->create;
 }
 
+struct numa_drop_counters {
+       atomic_t        drops0 ____cacheline_aligned_in_smp;
+       atomic_t        drops1 ____cacheline_aligned_in_smp;
+};
+
+static inline int numa_drop_read(const struct numa_drop_counters *ndc)
+{
+       return atomic_read(&ndc->drops0) + atomic_read(&ndc->drops1);
+}
+
+static inline void numa_drop_add(struct numa_drop_counters *ndc, int val)
+{
+       int n = numa_node_id() % 2;
+
+       if (n)
+               atomic_add(val, &ndc->drops1);
+       else
+               atomic_add(val, &ndc->drops0);
+}
+
+static inline void numa_drop_reset(struct numa_drop_counters *ndc)
+{
+       atomic_set(&ndc->drops0, 0);
+       atomic_set(&ndc->drops1, 0);
+}
+
 /*
  * Incoming packets are placed on per-CPU queues
  */
@@ -3504,7 +3530,7 @@ struct softnet_data {
        struct sk_buff_head     input_pkt_queue;
        struct napi_struct      backlog;
 
-       atomic_t                dropped ____cacheline_aligned_in_smp;
+       struct numa_drop_counters drop_counters;
 
        /* Another possibly contended cache line */
        spinlock_t              defer_lock ____cacheline_aligned_in_smp;
index 981506be1e15ad3aa831c1d4884372b2a477f988..6ed008ab166557e868c1918daaaa5d551b7989a7 100644 (file)
@@ -108,7 +108,7 @@ struct udp_sock {
         * the last UDP socket cacheline.
         */
        struct hlist_node       tunnel_list;
-       struct socket_drop_counters drop_counters;
+       struct numa_drop_counters drop_counters;
 };
 
 #define udp_test_bit(nr, sk)                   \
index d5270913906077f88cbd843ed1edde345b4d42d7..66c0ffeada2eb10711e7ca7a7f05e6e36817e070 100644 (file)
@@ -81,7 +81,7 @@ struct raw_sock {
        struct inet_sock   inet;
        struct icmp_filter filter;
        u32                ipmr_table;
-       struct socket_drop_counters drop_counters;
+       struct numa_drop_counters drop_counters;
 };
 
 #define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk)
index 896bec2d2176638460345c6fac5614f63df215d7..0fd465935334160eeda7c1ea608f5d6161f02cb1 100644 (file)
@@ -102,11 +102,6 @@ struct net;
 typedef __u32 __bitwise __portpair;
 typedef __u64 __bitwise __addrpair;
 
-struct socket_drop_counters {
-       atomic_t        drops0 ____cacheline_aligned_in_smp;
-       atomic_t        drops1 ____cacheline_aligned_in_smp;
-};
-
 /**
  *     struct sock_common - minimal network layer representation of sockets
  *     @skc_daddr: Foreign IPv4 addr
@@ -287,7 +282,7 @@ struct sk_filter;
   *    @sk_err_soft: errors that don't cause failure but are the cause of a
   *                  persistent failure not just 'timed out'
   *    @sk_drops: raw/udp drops counter
-  *    @sk_drop_counters: optional pointer to socket_drop_counters
+  *    @sk_drop_counters: optional pointer to numa_drop_counters
   *    @sk_ack_backlog: current listen backlog
   *    @sk_max_ack_backlog: listen backlog set in listen()
   *    @sk_uid: user id of owner
@@ -456,7 +451,7 @@ struct sock {
 #ifdef CONFIG_XFRM
        struct xfrm_policy __rcu *sk_policy[2];
 #endif
-       struct socket_drop_counters *sk_drop_counters;
+       struct numa_drop_counters *sk_drop_counters;
        __cacheline_group_end(sock_read_rxtx);
 
        __cacheline_group_begin(sock_write_rxtx);
@@ -2698,18 +2693,12 @@ struct sock_skb_cb {
 
 static inline void sk_drops_add(struct sock *sk, int segs)
 {
-       struct socket_drop_counters *sdc = sk->sk_drop_counters;
+       struct numa_drop_counters *ndc = sk->sk_drop_counters;
 
-       if (sdc) {
-               int n = numa_node_id() % 2;
-
-               if (n)
-                       atomic_add(segs, &sdc->drops1);
-               else
-                       atomic_add(segs, &sdc->drops0);
-       } else {
+       if (ndc)
+               numa_drop_add(ndc, segs);
+       else
                atomic_add(segs, &sk->sk_drops);
-       }
 }
 
 static inline void sk_drops_inc(struct sock *sk)
@@ -2719,23 +2708,21 @@ static inline void sk_drops_inc(struct sock *sk)
 
 static inline int sk_drops_read(const struct sock *sk)
 {
-       const struct socket_drop_counters *sdc = sk->sk_drop_counters;
+       const struct numa_drop_counters *ndc = sk->sk_drop_counters;
 
-       if (sdc) {
+       if (ndc) {
                DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops));
-               return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1);
+               return numa_drop_read(ndc);
        }
        return atomic_read(&sk->sk_drops);
 }
 
 static inline void sk_drops_reset(struct sock *sk)
 {
-       struct socket_drop_counters *sdc = sk->sk_drop_counters;
+       struct numa_drop_counters *ndc = sk->sk_drop_counters;
 
-       if (sdc) {
-               atomic_set(&sdc->drops0, 0);
-               atomic_set(&sdc->drops1, 0);
-       }
+       if (ndc)
+               numa_drop_reset(ndc);
        atomic_set(&sk->sk_drops, 0);
 }
 
index 1d1650d9ecff4a863b3449bf88d7201d72ec8e33..2522d9d8f0e4d0da3488afe98f46501eeecd5b51 100644 (file)
@@ -5248,7 +5248,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
        backlog_unlock_irq_restore(sd, &flags);
 
 cpu_backlog_drop:
-       atomic_inc(&sd->dropped);
+       numa_drop_add(&sd->drop_counters, 1);
 bad_dev:
        dev_core_stats_rx_dropped_inc(skb->dev);
        kfree_skb_reason(skb, reason);
index 4f0f0709a1cbc702dc44debc6325d20008b08d86..70e0e9a3b650c0753f0b865642aa372a956a4bf5 100644 (file)
@@ -145,7 +145,8 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
        seq_printf(seq,
                   "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
                   "%08x %08x\n",
-                  READ_ONCE(sd->processed), atomic_read(&sd->dropped),
+                  READ_ONCE(sd->processed),
+                  numa_drop_read(&sd->drop_counters),
                   READ_ONCE(sd->time_squeeze), 0,
                   0, 0, 0, 0, /* was fastroute */
                   0,   /* was cpu_collision */