]> www.infradead.org Git - users/hch/misc.git/commitdiff
inet_diag: avoid cache line misses in inet_diag_bc_sk()
authorEric Dumazet <edumazet@google.com>
Thu, 28 Aug 2025 10:27:38 +0000 (10:27 +0000)
committerJakub Kicinski <kuba@kernel.org>
Sat, 30 Aug 2025 02:29:24 +0000 (19:29 -0700)
inet_diag_bc_sk() pulls five cache lines per socket,
while most filters only need the two first ones.

Add three booleans to struct inet_diag_dump_data,
that are selectively set if a filter needs specific socket fields.

- mark_needed       /* INET_DIAG_BC_MARK_COND present. */
- cgroup_needed     /* INET_DIAG_BC_CGROUP_COND present. */
- userlocks_needed  /* INET_DIAG_BC_AUTO present. */

This removes millions of cache lines misses per ss invocation
when simple filters are specified on busy servers.

offsetof(struct sock, sk_userlocks) = 0xf3
offsetof(struct sock, sk_mark) = 0x20c
offsetof(struct sock, sk_cgrp_data) = 0x298

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20250828102738.2065992-6-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
include/linux/inet_diag.h
net/ipv4/inet_diag.c

index 86a0641ec36e1bf25483a8e6c3412073b9893d36..704fd415c2b497dfba591a7ef46009dec7824d75 100644 (file)
@@ -38,6 +38,11 @@ struct inet_diag_dump_data {
 #define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES]
 
        struct bpf_sk_storage_diag *bpf_stg_diag;
+       bool mark_needed;       /* INET_DIAG_BC_MARK_COND present. */
+#ifdef CONFIG_SOCK_CGROUP_DATA
+       bool cgroup_needed;     /* INET_DIAG_BC_CGROUP_COND present. */
+#endif
+       bool userlocks_needed;  /* INET_DIAG_BC_AUTO present. */
 };
 
 struct inet_connection_sock;
index 11710304268781581b3559aca770d50dc0090ef3..f0b6c5a411a2008e2a039ed37e262f3f132e58ac 100644 (file)
@@ -605,18 +605,22 @@ int inet_diag_bc_sk(const struct inet_diag_dump_data *cb_data, struct sock *sk)
        entry.sport = READ_ONCE(inet->inet_num);
        entry.dport = ntohs(READ_ONCE(inet->inet_dport));
        entry.ifindex = READ_ONCE(sk->sk_bound_dev_if);
-       entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0;
-       if (sk_fullsock(sk))
-               entry.mark = READ_ONCE(sk->sk_mark);
-       else if (sk->sk_state == TCP_NEW_SYN_RECV)
-               entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
-       else if (sk->sk_state == TCP_TIME_WAIT)
-               entry.mark = inet_twsk(sk)->tw_mark;
-       else
-               entry.mark = 0;
+       if (cb_data->userlocks_needed)
+               entry.userlocks = sk_fullsock(sk) ? READ_ONCE(sk->sk_userlocks) : 0;
+       if (cb_data->mark_needed) {
+               if (sk_fullsock(sk))
+                       entry.mark = READ_ONCE(sk->sk_mark);
+               else if (sk->sk_state == TCP_NEW_SYN_RECV)
+                       entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+               else if (sk->sk_state == TCP_TIME_WAIT)
+                       entry.mark = inet_twsk(sk)->tw_mark;
+               else
+                       entry.mark = 0;
+       }
 #ifdef CONFIG_SOCK_CGROUP_DATA
-       entry.cgroup_id = sk_fullsock(sk) ?
-               cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
+       if (cb_data->cgroup_needed)
+               entry.cgroup_id = sk_fullsock(sk) ?
+                       cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
 #endif
 
        return inet_diag_bc_run(bc, &entry);
@@ -716,16 +720,21 @@ static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len,
 }
 #endif
 
-static int inet_diag_bc_audit(const struct nlattr *attr,
+static int inet_diag_bc_audit(struct inet_diag_dump_data *cb_data,
                              const struct sk_buff *skb)
 {
-       bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
+       const struct nlattr *attr = cb_data->inet_diag_nla_bc;
        const void *bytecode, *bc;
        int bytecode_len, len;
+       bool net_admin;
+
+       if (!attr)
+               return 0;
 
-       if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op))
+       if (nla_len(attr) < sizeof(struct inet_diag_bc_op))
                return -EINVAL;
 
+       net_admin = netlink_net_capable(skb, CAP_NET_ADMIN);
        bytecode = bc = nla_data(attr);
        len = bytecode_len = nla_len(attr);
 
@@ -757,14 +766,18 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
                                return -EPERM;
                        if (!valid_markcond(bc, len, &min_len))
                                return -EINVAL;
+                       cb_data->mark_needed = true;
                        break;
 #ifdef CONFIG_SOCK_CGROUP_DATA
                case INET_DIAG_BC_CGROUP_COND:
                        if (!valid_cgroupcond(bc, len, &min_len))
                                return -EINVAL;
+                       cb_data->cgroup_needed = true;
                        break;
 #endif
                case INET_DIAG_BC_AUTO:
+                       cb_data->userlocks_needed = true;
+                       fallthrough;
                case INET_DIAG_BC_JMP:
                case INET_DIAG_BC_NOP:
                        break;
@@ -841,13 +854,10 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen)
                kfree(cb_data);
                return err;
        }
-       nla = cb_data->inet_diag_nla_bc;
-       if (nla) {
-               err = inet_diag_bc_audit(nla, skb);
-               if (err) {
-                       kfree(cb_data);
-                       return err;
-               }
+       err = inet_diag_bc_audit(cb_data, skb);
+       if (err) {
+               kfree(cb_data);
+               return err;
        }
 
        nla = cb_data->inet_diag_nla_bpf_stgs;