Hosts under DOS attack can suffer from false sharing
in enqueue_to_backlog() : atomic_inc(&sd->dropped).
This is because sd->dropped can be touched from many cpus,
possibly residing on different NUMA nodes.
Generalize the sk_drop_counters infrastucture
added in commit
c51613fa276f ("net: add sk->sk_drop_counters")
and use it to replace softnet_data.dropped
with NUMA friendly softnet_data.drop_counters.
This adds 64 bytes per cpu, maybe more in the future
if we increase the number of counters (currently 2)
per 'struct numa_drop_counters'.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250909121942.1202585-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
__u32 offset; /* checksum offset */
struct icmp6_filter filter;
__u32 ip6mr_table;
- struct socket_drop_counters drop_counters;
+ struct numa_drop_counters drop_counters;
struct ipv6_pinfo inet6;
};
return dev->header_ops && dev->header_ops->create;
}
+struct numa_drop_counters {
+ atomic_t drops0 ____cacheline_aligned_in_smp;
+ atomic_t drops1 ____cacheline_aligned_in_smp;
+};
+
+static inline int numa_drop_read(const struct numa_drop_counters *ndc)
+{
+ return atomic_read(&ndc->drops0) + atomic_read(&ndc->drops1);
+}
+
+static inline void numa_drop_add(struct numa_drop_counters *ndc, int val)
+{
+ int n = numa_node_id() % 2;
+
+ if (n)
+ atomic_add(val, &ndc->drops1);
+ else
+ atomic_add(val, &ndc->drops0);
+}
+
+static inline void numa_drop_reset(struct numa_drop_counters *ndc)
+{
+ atomic_set(&ndc->drops0, 0);
+ atomic_set(&ndc->drops1, 0);
+}
+
/*
* Incoming packets are placed on per-CPU queues
*/
struct sk_buff_head input_pkt_queue;
struct napi_struct backlog;
- atomic_t dropped ____cacheline_aligned_in_smp;
+ struct numa_drop_counters drop_counters;
/* Another possibly contended cache line */
spinlock_t defer_lock ____cacheline_aligned_in_smp;
* the last UDP socket cacheline.
*/
struct hlist_node tunnel_list;
- struct socket_drop_counters drop_counters;
+ struct numa_drop_counters drop_counters;
};
#define udp_test_bit(nr, sk) \
struct inet_sock inet;
struct icmp_filter filter;
u32 ipmr_table;
- struct socket_drop_counters drop_counters;
+ struct numa_drop_counters drop_counters;
};
#define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk)
typedef __u32 __bitwise __portpair;
typedef __u64 __bitwise __addrpair;
-struct socket_drop_counters {
- atomic_t drops0 ____cacheline_aligned_in_smp;
- atomic_t drops1 ____cacheline_aligned_in_smp;
-};
-
/**
* struct sock_common - minimal network layer representation of sockets
* @skc_daddr: Foreign IPv4 addr
* @sk_err_soft: errors that don't cause failure but are the cause of a
* persistent failure not just 'timed out'
* @sk_drops: raw/udp drops counter
- * @sk_drop_counters: optional pointer to socket_drop_counters
+ * @sk_drop_counters: optional pointer to numa_drop_counters
* @sk_ack_backlog: current listen backlog
* @sk_max_ack_backlog: listen backlog set in listen()
* @sk_uid: user id of owner
#ifdef CONFIG_XFRM
struct xfrm_policy __rcu *sk_policy[2];
#endif
- struct socket_drop_counters *sk_drop_counters;
+ struct numa_drop_counters *sk_drop_counters;
__cacheline_group_end(sock_read_rxtx);
__cacheline_group_begin(sock_write_rxtx);
static inline void sk_drops_add(struct sock *sk, int segs)
{
- struct socket_drop_counters *sdc = sk->sk_drop_counters;
+ struct numa_drop_counters *ndc = sk->sk_drop_counters;
- if (sdc) {
- int n = numa_node_id() % 2;
-
- if (n)
- atomic_add(segs, &sdc->drops1);
- else
- atomic_add(segs, &sdc->drops0);
- } else {
+ if (ndc)
+ numa_drop_add(ndc, segs);
+ else
atomic_add(segs, &sk->sk_drops);
- }
}
static inline void sk_drops_inc(struct sock *sk)
static inline int sk_drops_read(const struct sock *sk)
{
- const struct socket_drop_counters *sdc = sk->sk_drop_counters;
+ const struct numa_drop_counters *ndc = sk->sk_drop_counters;
- if (sdc) {
+ if (ndc) {
DEBUG_NET_WARN_ON_ONCE(atomic_read(&sk->sk_drops));
- return atomic_read(&sdc->drops0) + atomic_read(&sdc->drops1);
+ return numa_drop_read(ndc);
}
return atomic_read(&sk->sk_drops);
}
static inline void sk_drops_reset(struct sock *sk)
{
- struct socket_drop_counters *sdc = sk->sk_drop_counters;
+ struct numa_drop_counters *ndc = sk->sk_drop_counters;
- if (sdc) {
- atomic_set(&sdc->drops0, 0);
- atomic_set(&sdc->drops1, 0);
- }
+ if (ndc)
+ numa_drop_reset(ndc);
atomic_set(&sk->sk_drops, 0);
}
backlog_unlock_irq_restore(sd, &flags);
cpu_backlog_drop:
- atomic_inc(&sd->dropped);
+ numa_drop_add(&sd->drop_counters, 1);
bad_dev:
dev_core_stats_rx_dropped_inc(skb->dev);
kfree_skb_reason(skb, reason);
seq_printf(seq,
"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x\n",
- READ_ONCE(sd->processed), atomic_read(&sd->dropped),
+ READ_ONCE(sd->processed),
+ numa_drop_read(&sd->drop_counters),
READ_ONCE(sd->time_squeeze), 0,
0, 0, 0, 0, /* was fastroute */
0, /* was cpu_collision */