From d51705614f668254cc5def7490df76f9680b4659 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 11:47:35 -0700 Subject: [PATCH 01/16] mctp: Handle error of rtnl_register_module(). Since introduced, mctp has been ignoring the returned value of rtnl_register_module(), which could fail silently. Handling the error allows users to view a module as an all-or-nothing thing in terms of the rtnetlink functionality. This prevents syzkaller from reporting spurious errors from its tests, where OOM often occurs and module is automatically loaded. Let's handle the errors by rtnl_register_many(). Fixes: 583be982d934 ("mctp: Add device handling and netlink interface") Fixes: 831119f88781 ("mctp: Add neighbour netlink interface") Fixes: 06d2f4c583a7 ("mctp: Add netlink route management") Signed-off-by: Kuniyuki Iwashima Reviewed-by: Jeremy Kerr Signed-off-by: Paolo Abeni --- include/net/mctp.h | 2 +- net/mctp/af_mctp.c | 6 +++++- net/mctp/device.c | 30 ++++++++++++++++++------------ net/mctp/neigh.c | 31 +++++++++++++++++++------------ net/mctp/route.c | 33 +++++++++++++++++++++++---------- 5 files changed, 66 insertions(+), 36 deletions(-) diff --git a/include/net/mctp.h b/include/net/mctp.h index 7b17c52e8ce2..28d59ae94ca3 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -295,7 +295,7 @@ void mctp_neigh_remove_dev(struct mctp_dev *mdev); int mctp_routes_init(void); void mctp_routes_exit(void); -void mctp_device_init(void); +int mctp_device_init(void); void mctp_device_exit(void); #endif /* __NET_MCTP_H */ diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 43288b408fde..f6de136008f6 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -756,10 +756,14 @@ static __init int mctp_init(void) if (rc) goto err_unreg_routes; - mctp_device_init(); + rc = mctp_device_init(); + if (rc) + goto err_unreg_neigh; return 0; +err_unreg_neigh: + mctp_neigh_exit(); err_unreg_routes: mctp_routes_exit(); err_unreg_proto: diff --git a/net/mctp/device.c b/net/mctp/device.c index acb97b257428..85cc5f31f1e7 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -524,25 +524,31 @@ static struct notifier_block mctp_dev_nb = { .priority = ADDRCONF_NOTIFY_PRIORITY, }; -void __init mctp_device_init(void) +static const struct rtnl_msg_handler mctp_device_rtnl_msg_handlers[] = { + {THIS_MODULE, PF_MCTP, RTM_NEWADDR, mctp_rtm_newaddr, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_DELADDR, mctp_rtm_deladdr, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_GETADDR, NULL, mctp_dump_addrinfo, 0}, +}; + +int __init mctp_device_init(void) { - register_netdevice_notifier(&mctp_dev_nb); + int err; - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETADDR, - NULL, mctp_dump_addrinfo, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWADDR, - mctp_rtm_newaddr, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELADDR, - mctp_rtm_deladdr, NULL, 0); + register_netdevice_notifier(&mctp_dev_nb); rtnl_af_register(&mctp_af_ops); + + err = rtnl_register_many(mctp_device_rtnl_msg_handlers); + if (err) { + rtnl_af_unregister(&mctp_af_ops); + unregister_netdevice_notifier(&mctp_dev_nb); + } + + return err; } void __exit mctp_device_exit(void) { + rtnl_unregister_many(mctp_device_rtnl_msg_handlers); rtnl_af_unregister(&mctp_af_ops); - rtnl_unregister(PF_MCTP, RTM_DELADDR); - rtnl_unregister(PF_MCTP, RTM_NEWADDR); - rtnl_unregister(PF_MCTP, RTM_GETADDR); - unregister_netdevice_notifier(&mctp_dev_nb); } diff --git a/net/mctp/neigh.c b/net/mctp/neigh.c index ffa0f9e0983f..590f642413e4 100644 --- a/net/mctp/neigh.c +++ b/net/mctp/neigh.c @@ -322,22 +322,29 @@ static struct pernet_operations mctp_net_ops = { .exit = mctp_neigh_net_exit, }; +static const struct rtnl_msg_handler mctp_neigh_rtnl_msg_handlers[] = { + {THIS_MODULE, PF_MCTP, RTM_NEWNEIGH, mctp_rtm_newneigh, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_DELNEIGH, mctp_rtm_delneigh, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_GETNEIGH, NULL, mctp_rtm_getneigh, 0}, +}; + int __init mctp_neigh_init(void) { - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWNEIGH, - mctp_rtm_newneigh, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELNEIGH, - mctp_rtm_delneigh, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETNEIGH, - NULL, mctp_rtm_getneigh, 0); - - return register_pernet_subsys(&mctp_net_ops); + int err; + + err = register_pernet_subsys(&mctp_net_ops); + if (err) + return err; + + err = rtnl_register_many(mctp_neigh_rtnl_msg_handlers); + if (err) + unregister_pernet_subsys(&mctp_net_ops); + + return err; } -void __exit mctp_neigh_exit(void) +void mctp_neigh_exit(void) { + rtnl_unregister_many(mctp_neigh_rtnl_msg_handlers); unregister_pernet_subsys(&mctp_net_ops); - rtnl_unregister(PF_MCTP, RTM_GETNEIGH); - rtnl_unregister(PF_MCTP, RTM_DELNEIGH); - rtnl_unregister(PF_MCTP, RTM_NEWNEIGH); } diff --git a/net/mctp/route.c b/net/mctp/route.c index eefd7834d9a0..597e9cf5aa64 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -1474,26 +1474,39 @@ static struct pernet_operations mctp_net_ops = { .exit = mctp_routes_net_exit, }; +static const struct rtnl_msg_handler mctp_route_rtnl_msg_handlers[] = { + {THIS_MODULE, PF_MCTP, RTM_NEWROUTE, mctp_newroute, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_DELROUTE, mctp_delroute, NULL, 0}, + {THIS_MODULE, PF_MCTP, RTM_GETROUTE, NULL, mctp_dump_rtinfo, 0}, +}; + int __init mctp_routes_init(void) { + int err; + dev_add_pack(&mctp_packet_type); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE, - NULL, mctp_dump_rtinfo, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE, - mctp_newroute, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE, - mctp_delroute, NULL, 0); + err = register_pernet_subsys(&mctp_net_ops); + if (err) + goto err_pernet; + + err = rtnl_register_many(mctp_route_rtnl_msg_handlers); + if (err) + goto err_rtnl; - return register_pernet_subsys(&mctp_net_ops); + return 0; + +err_rtnl: + unregister_pernet_subsys(&mctp_net_ops); +err_pernet: + dev_remove_pack(&mctp_packet_type); + return err; } void mctp_routes_exit(void) { + rtnl_unregister_many(mctp_route_rtnl_msg_handlers); unregister_pernet_subsys(&mctp_net_ops); - rtnl_unregister(PF_MCTP, RTM_DELROUTE); - rtnl_unregister(PF_MCTP, RTM_NEWROUTE); - rtnl_unregister(PF_MCTP, RTM_GETROUTE); dev_remove_pack(&mctp_packet_type); } -- 2.51.0 From 5be2062e3080e3ff6707816caa445ec0c6eaacf7 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 11:47:36 -0700 Subject: [PATCH 02/16] mpls: Handle error of rtnl_register_module(). Since introduced, mpls_init() has been ignoring the returned value of rtnl_register_module(), which could fail silently. Handling the error allows users to view a module as an all-or-nothing thing in terms of the rtnetlink functionality. This prevents syzkaller from reporting spurious errors from its tests, where OOM often occurs and module is automatically loaded. Let's handle the errors by rtnl_register_many(). Fixes: 03c0566542f4 ("mpls: Netlink commands to add, remove, and dump routes") Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/mpls/af_mpls.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index aba983531ed3..df62638b6498 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -2728,6 +2728,15 @@ static struct rtnl_af_ops mpls_af_ops __read_mostly = { .get_stats_af_size = mpls_get_stats_af_size, }; +static const struct rtnl_msg_handler mpls_rtnl_msg_handlers[] __initdata_or_module = { + {THIS_MODULE, PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0}, + {THIS_MODULE, PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0}, + {THIS_MODULE, PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, 0}, + {THIS_MODULE, PF_MPLS, RTM_GETNETCONF, + mpls_netconf_get_devconf, mpls_netconf_dump_devconf, + RTNL_FLAG_DUMP_UNLOCKED}, +}; + static int __init mpls_init(void) { int err; @@ -2746,24 +2755,25 @@ static int __init mpls_init(void) rtnl_af_register(&mpls_af_ops); - rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_NEWROUTE, - mpls_rtm_newroute, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_DELROUTE, - mpls_rtm_delroute, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETROUTE, - mpls_getroute, mpls_dump_routes, 0); - rtnl_register_module(THIS_MODULE, PF_MPLS, RTM_GETNETCONF, - mpls_netconf_get_devconf, - mpls_netconf_dump_devconf, - RTNL_FLAG_DUMP_UNLOCKED); - err = ipgre_tunnel_encap_add_mpls_ops(); + err = rtnl_register_many(mpls_rtnl_msg_handlers); if (err) + goto out_unregister_rtnl_af; + + err = ipgre_tunnel_encap_add_mpls_ops(); + if (err) { pr_err("Can't add mpls over gre tunnel ops\n"); + goto out_unregister_rtnl; + } err = 0; out: return err; +out_unregister_rtnl: + rtnl_unregister_many(mpls_rtnl_msg_handlers); +out_unregister_rtnl_af: + rtnl_af_unregister(&mpls_af_ops); + dev_remove_pack(&mpls_packet_type); out_unregister_pernet: unregister_pernet_subsys(&mpls_net_ops); goto out; -- 2.51.0 From b5e837c86041bef60f36cf9f20a641a30764379a Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 8 Oct 2024 11:47:37 -0700 Subject: [PATCH 03/16] phonet: Handle error of rtnl_register_module(). MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Before commit addf9b90de22 ("net: rtnetlink: use rcu to free rtnl message handlers"), once the first rtnl_register_module() allocated rtnl_msg_handlers[PF_PHONET], the following calls never failed. However, after the commit, rtnl_register_module() could fail silently to allocate rtnl_msg_handlers[PF_PHONET][msgtype] and requires error handling for each call. Handling the error allows users to view a module as an all-or-nothing thing in terms of the rtnetlink functionality. This prevents syzkaller from reporting spurious errors from its tests, where OOM often occurs and module is automatically loaded. Let's use rtnl_register_many() to handle the errors easily. Fixes: addf9b90de22 ("net: rtnetlink: use rcu to free rtnl message handlers") Signed-off-by: Kuniyuki Iwashima Acked-by: Rémi Denis-Courmont Signed-off-by: Paolo Abeni --- net/phonet/pn_netlink.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 7008d402499d..894e5c72d6bf 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -285,23 +285,17 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return err; } +static const struct rtnl_msg_handler phonet_rtnl_msg_handlers[] __initdata_or_module = { + {THIS_MODULE, PF_PHONET, RTM_NEWADDR, addr_doit, NULL, 0}, + {THIS_MODULE, PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0}, + {THIS_MODULE, PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0}, + {THIS_MODULE, PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0}, + {THIS_MODULE, PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0}, + {THIS_MODULE, PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, + RTNL_FLAG_DUMP_UNLOCKED}, +}; + int __init phonet_netlink_register(void) { - int err = rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWADDR, - addr_doit, NULL, 0); - if (err) - return err; - - /* Further rtnl_register_module() cannot fail */ - rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELADDR, - addr_doit, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETADDR, - NULL, getaddr_dumpit, 0); - rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_NEWROUTE, - route_doit, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_DELROUTE, - route_doit, NULL, 0); - rtnl_register_module(THIS_MODULE, PF_PHONET, RTM_GETROUTE, - NULL, route_dumpit, RTNL_FLAG_DUMP_UNLOCKED); - return 0; + return rtnl_register_many(phonet_rtnl_msg_handlers); } -- 2.51.0 From aeb218d900e3ea2cc3878ba92cb4758227075358 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 9 Oct 2024 10:12:19 +0100 Subject: [PATCH 04/16] docs: netdev: document guidance on cleanup patches The purpose of this section is to document what is the current practice regarding clean-up patches which address checkpatch warnings and similar problems. I feel there is a value in having this documented so others can easily refer to it. Clearly this topic is subjective. And to some extent the current practice discourages a wider range of patches than is described here. But I feel it is best to start somewhere, with the most well established part of the current practice. Signed-off-by: Simon Horman Link: https://patch.msgid.link/20241009-doc-mc-clean-v2-1-e637b665fa81@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/process/maintainer-netdev.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst index c9edf9e7362d..1ae71e31591c 100644 --- a/Documentation/process/maintainer-netdev.rst +++ b/Documentation/process/maintainer-netdev.rst @@ -355,6 +355,8 @@ just do it. As a result, a sequence of smaller series gets merged quicker and with better review coverage. Re-posting large series also increases the mailing list traffic. +.. _rcs: + Local variable ordering ("reverse xmas tree", "RCS") ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -391,6 +393,21 @@ APIs and helpers, especially scoped iterators. However, direct use of ``__free()`` within networking core and drivers is discouraged. Similar guidance applies to declaring variables mid-function. +Clean-up patches +~~~~~~~~~~~~~~~~ + +Netdev discourages patches which perform simple clean-ups, which are not in +the context of other work. For example: + +* Addressing ``checkpatch.pl`` warnings +* Addressing :ref:`Local variable ordering` issues +* Conversions to device-managed APIs (``devm_`` helpers) + +This is because it is felt that the churn that such changes produce comes +at a greater cost than the value of such clean-ups. + +Conversely, spelling and grammar fixes are not discouraged. + Resending after review ~~~~~~~~~~~~~~~~~~~~~~ -- 2.51.0 From 40dddd4b8bd08a69471efd96107a4e1c73fabefc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2024 18:58:02 +0000 Subject: [PATCH 05/16] ppp: fix ppp_async_encode() illegal access syzbot reported an issue in ppp_async_encode() [1] In this case, pppoe_sendmsg() is called with a zero size. Then ppp_async_encode() is called with an empty skb. BUG: KMSAN: uninit-value in ppp_async_encode drivers/net/ppp/ppp_async.c:545 [inline] BUG: KMSAN: uninit-value in ppp_async_push+0xb4f/0x2660 drivers/net/ppp/ppp_async.c:675 ppp_async_encode drivers/net/ppp/ppp_async.c:545 [inline] ppp_async_push+0xb4f/0x2660 drivers/net/ppp/ppp_async.c:675 ppp_async_send+0x130/0x1b0 drivers/net/ppp/ppp_async.c:634 ppp_channel_bridge_input drivers/net/ppp/ppp_generic.c:2280 [inline] ppp_input+0x1f1/0xe60 drivers/net/ppp/ppp_generic.c:2304 pppoe_rcv_core+0x1d3/0x720 drivers/net/ppp/pppoe.c:379 sk_backlog_rcv+0x13b/0x420 include/net/sock.h:1113 __release_sock+0x1da/0x330 net/core/sock.c:3072 release_sock+0x6b/0x250 net/core/sock.c:3626 pppoe_sendmsg+0x2b8/0xb90 drivers/net/ppp/pppoe.c:903 sock_sendmsg_nosec net/socket.c:729 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:744 ____sys_sendmsg+0x903/0xb60 net/socket.c:2602 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656 __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742 __do_sys_sendmmsg net/socket.c:2771 [inline] __se_sys_sendmmsg net/socket.c:2768 [inline] __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768 x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: slab_post_alloc_hook mm/slub.c:4092 [inline] slab_alloc_node mm/slub.c:4135 [inline] kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4187 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:587 __alloc_skb+0x363/0x7b0 net/core/skbuff.c:678 alloc_skb include/linux/skbuff.h:1322 [inline] sock_wmalloc+0xfe/0x1a0 net/core/sock.c:2732 pppoe_sendmsg+0x3a7/0xb90 drivers/net/ppp/pppoe.c:867 sock_sendmsg_nosec net/socket.c:729 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:744 ____sys_sendmsg+0x903/0xb60 net/socket.c:2602 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656 __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742 __do_sys_sendmmsg net/socket.c:2771 [inline] __se_sys_sendmmsg net/socket.c:2768 [inline] __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768 x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f CPU: 1 UID: 0 PID: 5411 Comm: syz.1.14 Not tainted 6.12.0-rc1-syzkaller-00165-g360c1f1f24c6 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+1d121645899e7692f92a@syzkaller.appspotmail.com Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241009185802.3763282-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ppp/ppp_async.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index a940b9a67107..c97406c6004d 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -542,7 +542,7 @@ ppp_async_encode(struct asyncppp *ap) * and 7 (code-reject) must be sent as though no options * had been negotiated. */ - islcp = proto == PPP_LCP && 1 <= data[2] && data[2] <= 7; + islcp = proto == PPP_LCP && count >= 3 && 1 <= data[2] && data[2] <= 7; if (i == 0) { if (islcp) -- 2.51.0 From 6fd27ea183c208e478129a85e11d880fc70040f2 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Wed, 9 Oct 2024 14:55:16 +0800 Subject: [PATCH 06/16] net/smc: fix lacks of icsk_syn_mss with IPPROTO_SMC Eric report a panic on IPPROTO_SMC, and give the facts that when INET_PROTOSW_ICSK was set, icsk->icsk_sync_mss must be set too. Bug: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Mem abort info: ESR = 0x0000000086000005 EC = 0x21: IABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 FSC = 0x05: level 1 translation fault user pgtable: 4k pages, 48-bit VAs, pgdp=00000001195d1000 [0000000000000000] pgd=0800000109c46003, p4d=0800000109c46003, pud=0000000000000000 Internal error: Oops: 0000000086000005 [#1] PREEMPT SMP Modules linked in: CPU: 1 UID: 0 PID: 8037 Comm: syz.3.265 Not tainted 6.11.0-rc7-syzkaller-g5f5673607153 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : 0x0 lr : cipso_v4_sock_setattr+0x2a8/0x3c0 net/ipv4/cipso_ipv4.c:1910 sp : ffff80009b887a90 x29: ffff80009b887aa0 x28: ffff80008db94050 x27: 0000000000000000 x26: 1fffe0001aa6f5b3 x25: dfff800000000000 x24: ffff0000db75da00 x23: 0000000000000000 x22: ffff0000d8b78518 x21: 0000000000000000 x20: ffff0000d537ad80 x19: ffff0000d8b78000 x18: 1fffe000366d79ee x17: ffff8000800614a8 x16: ffff800080569b84 x15: 0000000000000001 x14: 000000008b336894 x13: 00000000cd96feaa x12: 0000000000000003 x11: 0000000000040000 x10: 00000000000020a3 x9 : 1fffe0001b16f0f1 x8 : 0000000000000000 x7 : 0000000000000000 x6 : 000000000000003f x5 : 0000000000000040 x4 : 0000000000000001 x3 : 0000000000000000 x2 : 0000000000000002 x1 : 0000000000000000 x0 : ffff0000d8b78000 Call trace: 0x0 netlbl_sock_setattr+0x2e4/0x338 net/netlabel/netlabel_kapi.c:1000 smack_netlbl_add+0xa4/0x154 security/smack/smack_lsm.c:2593 smack_socket_post_create+0xa8/0x14c security/smack/smack_lsm.c:2973 security_socket_post_create+0x94/0xd4 security/security.c:4425 __sock_create+0x4c8/0x884 net/socket.c:1587 sock_create net/socket.c:1622 [inline] __sys_socket_create net/socket.c:1659 [inline] __sys_socket+0x134/0x340 net/socket.c:1706 __do_sys_socket net/socket.c:1720 [inline] __se_sys_socket net/socket.c:1718 [inline] __arm64_sys_socket+0x7c/0x94 net/socket.c:1718 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:712 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:730 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:598 Code: ???????? ???????? ???????? ???????? (????????) ---[ end trace 0000000000000000 ]--- This patch add a toy implementation that performs a simple return to prevent such panic. This is because MSS can be set in sock_create_kern or smc_setsockopt, similar to how it's done in AF_SMC. However, for AF_SMC, there is currently no way to synchronize MSS within __sys_connect_file. This toy implementation lays the groundwork for us to support such feature for IPPROTO_SMC in the future. Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Reported-by: Eric Dumazet Signed-off-by: D. Wythe Reviewed-by: Eric Dumazet Reviewed-by: Wenjia Zhang Link: https://patch.msgid.link/1728456916-67035-1-git-send-email-alibuda@linux.alibaba.com Signed-off-by: Jakub Kicinski --- net/smc/smc_inet.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c index a5b2041600f9..a944e7dcb8b9 100644 --- a/net/smc/smc_inet.c +++ b/net/smc/smc_inet.c @@ -108,12 +108,23 @@ static struct inet_protosw smc_inet6_protosw = { }; #endif /* CONFIG_IPV6 */ +static unsigned int smc_sync_mss(struct sock *sk, u32 pmtu) +{ + /* No need pass it through to clcsock, mss can always be set by + * sock_create_kern or smc_setsockopt. + */ + return 0; +} + static int smc_inet_init_sock(struct sock *sk) { struct net *net = sock_net(sk); /* init common smc sock */ smc_sk_init(net, sk, IPPROTO_SMC); + + inet_csk(sk)->icsk_sync_mss = smc_sync_mss; + /* create clcsock */ return smc_create_clcsk(net, sk, sk->sk_family); } -- 2.51.0 From 7d3fce8cbe3a70a1c7c06c9b53696be5d5d8dd5c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 9 Oct 2024 09:11:32 +0000 Subject: [PATCH 07/16] slip: make slhc_remember() more robust against malicious packets syzbot found that slhc_remember() was missing checks against malicious packets [1]. slhc_remember() only checked the size of the packet was at least 20, which is not good enough. We need to make sure the packet includes the IPv4 and TCP header that are supposed to be carried. Add iph and th pointers to make the code more readable. [1] BUG: KMSAN: uninit-value in slhc_remember+0x2e8/0x7b0 drivers/net/slip/slhc.c:666 slhc_remember+0x2e8/0x7b0 drivers/net/slip/slhc.c:666 ppp_receive_nonmp_frame+0xe45/0x35e0 drivers/net/ppp/ppp_generic.c:2455 ppp_receive_frame drivers/net/ppp/ppp_generic.c:2372 [inline] ppp_do_recv+0x65f/0x40d0 drivers/net/ppp/ppp_generic.c:2212 ppp_input+0x7dc/0xe60 drivers/net/ppp/ppp_generic.c:2327 pppoe_rcv_core+0x1d3/0x720 drivers/net/ppp/pppoe.c:379 sk_backlog_rcv+0x13b/0x420 include/net/sock.h:1113 __release_sock+0x1da/0x330 net/core/sock.c:3072 release_sock+0x6b/0x250 net/core/sock.c:3626 pppoe_sendmsg+0x2b8/0xb90 drivers/net/ppp/pppoe.c:903 sock_sendmsg_nosec net/socket.c:729 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:744 ____sys_sendmsg+0x903/0xb60 net/socket.c:2602 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656 __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742 __do_sys_sendmmsg net/socket.c:2771 [inline] __se_sys_sendmmsg net/socket.c:2768 [inline] __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768 x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Uninit was created at: slab_post_alloc_hook mm/slub.c:4091 [inline] slab_alloc_node mm/slub.c:4134 [inline] kmem_cache_alloc_node_noprof+0x6bf/0xb80 mm/slub.c:4186 kmalloc_reserve+0x13d/0x4a0 net/core/skbuff.c:587 __alloc_skb+0x363/0x7b0 net/core/skbuff.c:678 alloc_skb include/linux/skbuff.h:1322 [inline] sock_wmalloc+0xfe/0x1a0 net/core/sock.c:2732 pppoe_sendmsg+0x3a7/0xb90 drivers/net/ppp/pppoe.c:867 sock_sendmsg_nosec net/socket.c:729 [inline] __sock_sendmsg+0x30f/0x380 net/socket.c:744 ____sys_sendmsg+0x903/0xb60 net/socket.c:2602 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2656 __sys_sendmmsg+0x3c1/0x960 net/socket.c:2742 __do_sys_sendmmsg net/socket.c:2771 [inline] __se_sys_sendmmsg net/socket.c:2768 [inline] __x64_sys_sendmmsg+0xbc/0x120 net/socket.c:2768 x64_sys_call+0xb6e/0x3ba0 arch/x86/include/generated/asm/syscalls_64.h:308 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f CPU: 0 UID: 0 PID: 5460 Comm: syz.2.33 Not tainted 6.12.0-rc2-syzkaller-00006-g87d6aab2389e #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Fixes: b5451d783ade ("slip: Move the SLIP drivers") Reported-by: syzbot+2ada1bc857496353be5a@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/670646db.050a0220.3f80e.0027.GAE@google.com/T/#u Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241009091132.2136321-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/slip/slhc.c | 57 ++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 23 deletions(-) diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index 252cd757d3a2..ee9fd3a94b96 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -643,46 +643,57 @@ bad: int slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) { - struct cstate *cs; - unsigned ihl; - + const struct tcphdr *th; unsigned char index; + struct iphdr *iph; + struct cstate *cs; + unsigned int ihl; - if(isize < 20) { - /* The packet is shorter than a legal IP header */ + /* The packet is shorter than a legal IP header. + * Also make sure isize is positive. + */ + if (isize < (int)sizeof(struct iphdr)) { +runt: comp->sls_i_runt++; - return slhc_toss( comp ); + return slhc_toss(comp); } + iph = (struct iphdr *)icp; /* Peek at the IP header's IHL field to find its length */ - ihl = icp[0] & 0xf; - if(ihl < 20 / 4){ - /* The IP header length field is too small */ - comp->sls_i_runt++; - return slhc_toss( comp ); - } - index = icp[9]; - icp[9] = IPPROTO_TCP; + ihl = iph->ihl; + /* The IP header length field is too small, + * or packet is shorter than the IP header followed + * by minimal tcp header. + */ + if (ihl < 5 || isize < ihl * 4 + sizeof(struct tcphdr)) + goto runt; + + index = iph->protocol; + iph->protocol = IPPROTO_TCP; if (ip_fast_csum(icp, ihl)) { /* Bad IP header checksum; discard */ comp->sls_i_badcheck++; - return slhc_toss( comp ); + return slhc_toss(comp); } - if(index > comp->rslot_limit) { + if (index > comp->rslot_limit) { comp->sls_i_error++; return slhc_toss(comp); } - + th = (struct tcphdr *)(icp + ihl * 4); + if (th->doff < sizeof(struct tcphdr) / 4) + goto runt; + if (isize < ihl * 4 + th->doff * 4) + goto runt; /* Update local state */ cs = &comp->rstate[comp->recv_current = index]; comp->flags &=~ SLF_TOSS; - memcpy(&cs->cs_ip,icp,20); - memcpy(&cs->cs_tcp,icp + ihl*4,20); + memcpy(&cs->cs_ip, iph, sizeof(*iph)); + memcpy(&cs->cs_tcp, th, sizeof(*th)); if (ihl > 5) - memcpy(cs->cs_ipopt, icp + sizeof(struct iphdr), (ihl - 5) * 4); - if (cs->cs_tcp.doff > 5) - memcpy(cs->cs_tcpopt, icp + ihl*4 + sizeof(struct tcphdr), (cs->cs_tcp.doff - 5) * 4); - cs->cs_hsize = ihl*2 + cs->cs_tcp.doff*2; + memcpy(cs->cs_ipopt, &iph[1], (ihl - 5) * 4); + if (th->doff > 5) + memcpy(cs->cs_tcpopt, &th[1], (th->doff - 5) * 4); + cs->cs_hsize = ihl*2 + th->doff*2; cs->initialized = true; /* Put headers back on packet * Neither header checksum is recalculated -- 2.51.0 From 9937aae39bc09645cd67d53e0320926cd91570de Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 9 Oct 2024 09:47:22 +0100 Subject: [PATCH 08/16] MAINTAINERS: consistently exclude wireless files from NETWORKING [GENERAL] We already exclude wireless drivers from the netdev@ traffic, to delegate it to linux-wireless@, and avoid overwhelming netdev@. Many of the following wireless-related sections MAINTAINERS are already not included in the NETWORKING [GENERAL] section. For consistency, exclude those that are. * 802.11 (including CFG80211/NL80211) * MAC80211 * RFKILL Acked-by: Johannes Berg Signed-off-by: Simon Horman Link: https://patch.msgid.link/20241009-maint-net-hdrs-v2-1-f2c86e7309c8@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 58380aeafbf0..89789bddfb6a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16197,8 +16197,19 @@ F: lib/random32.c F: net/ F: tools/net/ F: tools/testing/selftests/net/ +X: Documentation/networking/mac80211-injection.rst +X: Documentation/networking/mac80211_hwsim/ +X: Documentation/networking/regulatory.rst +X: include/net/cfg80211.h +X: include/net/ieee80211_radiotap.h +X: include/net/iw_handler.h +X: include/net/mac80211.h +X: include/net/wext.h X: net/9p/ X: net/bluetooth/ +X: net/mac80211/ +X: net/rfkill/ +X: net/wireless/ NETWORKING [IPSEC] M: Steffen Klassert -- 2.51.0 From 5404b5a2fea9831a1f5be4ab9a94de07d976b177 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 9 Oct 2024 09:47:23 +0100 Subject: [PATCH 09/16] MAINTAINERS: Add headers and mailing list to UDP section Add netdev mailing list and some more udp.h headers to the UDP section. This is now more consistent with the TCP section. Acked-by: Willem de Bruijn Signed-off-by: Simon Horman Link: https://patch.msgid.link/20241009-maint-net-hdrs-v2-2-f2c86e7309c8@kernel.org Signed-off-by: Jakub Kicinski --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 89789bddfb6a..baf2ba9dc070 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24184,8 +24184,12 @@ F: drivers/usb/host/xhci* USER DATAGRAM PROTOCOL (UDP) M: Willem de Bruijn +L: netdev@vger.kernel.org S: Maintained F: include/linux/udp.h +F: include/net/udp.h +F: include/trace/events/udp.h +F: include/uapi/linux/udp.h F: net/ipv4/udp.c F: net/ipv6/udp.c -- 2.51.0 From f7345ccc62a4b880cf76458db5f320725f28e400 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 10 Oct 2024 18:36:09 +0200 Subject: [PATCH 10/16] rcu/nocb: Fix rcuog wake-up from offline softirq After a CPU has set itself offline and before it eventually calls rcutree_report_cpu_dead(), there are still opportunities for callbacks to be enqueued, for example from a softirq. When that happens on NOCB, the rcuog wake-up is deferred through an IPI to an online CPU in order not to call into the scheduler and risk arming the RT-bandwidth after hrtimers have been migrated out and disabled. But performing a synchronized IPI from a softirq is buggy as reported in the following scenario: WARNING: CPU: 1 PID: 26 at kernel/smp.c:633 smp_call_function_single Modules linked in: rcutorture torture CPU: 1 UID: 0 PID: 26 Comm: migration/1 Not tainted 6.11.0-rc1-00012-g9139f93209d1 #1 Stopper: multi_cpu_stop+0x0/0x320 <- __stop_cpus+0xd0/0x120 RIP: 0010:smp_call_function_single swake_up_one_online __call_rcu_nocb_wake __call_rcu_common ? rcu_torture_one_read call_timer_fn __run_timers run_timer_softirq handle_softirqs irq_exit_rcu ? tick_handle_periodic sysvec_apic_timer_interrupt Fix this with forcing deferred rcuog wake up through the NOCB timer when the CPU is offline. The actual wake up will happen from rcutree_report_cpu_dead(). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202409231644.4c55582d-lkp@intel.com Fixes: 9139f93209d1 ("rcu/nocb: Fix RT throttling hrtimer armed from offline CPU") Reviewed-by: "Joel Fernandes (Google)" Signed-off-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- kernel/rcu/tree_nocb.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 97b99cd06923..16865475120b 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -554,13 +554,19 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, TPS("WakeLazy")); - } else if (!irqs_disabled_flags(flags)) { + } else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) { /* ... if queue was empty ... */ rcu_nocb_unlock(rdp); wake_nocb_gp(rdp, false); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeEmpty")); } else { + /* + * Don't do the wake-up upfront on fragile paths. + * Also offline CPUs can't call swake_up_one_online() from + * (soft-)IRQs. Rely on the final deferred wake-up from + * rcutree_report_cpu_dead() + */ rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, TPS("WakeEmptyIsDeferred")); -- 2.51.0 From 6e0391e48cf9fb8b1b5e27c0cbbaf2e4639f2c33 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 9 Oct 2024 13:41:31 -0700 Subject: [PATCH 11/16] of: Skip kunit tests when arm64+ACPI doesn't populate root node A root node is required to apply DT overlays. A root node is usually present after commit 7b937cc243e5 ("of: Create of_root if no dtb provided by firmware"), except for on arm64 systems booted with ACPI tables. In that case, the root node is intentionally not populated because it would "allow DT devices to be instantiated atop an ACPI base system"[1]. Introduce an OF function that skips the kunit test if the root node isn't populated. Limit the test to when both CONFIG_ARM64 and CONFIG_ACPI are set, because otherwise the lack of a root node is a bug. Make the function private and take a kunit test parameter so that it can't be abused to test for the presence of the root node in non-test code. Use this function to skip tests that require the root node. Currently that's the DT tests and any tests that apply overlays. Reported-by: Guenter Roeck Closes: https://lore.kernel.org/r/6cd337fb-38f0-41cb-b942-5844b84433db@roeck-us.net Link: https://lore.kernel.org/r/Zd4dQpHO7em1ji67@FVFF77S0Q05N.cambridge.arm.com [1] Fixes: 893ecc6d2d61 ("of: Add KUnit test to confirm DTB is loaded") Signed-off-by: Stephen Boyd Tested-by: Guenter Roeck Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20241009204133.1169931-1-sboyd@kernel.org Signed-off-by: Rob Herring (Arm) --- drivers/of/of_kunit_helpers.c | 15 +++++++++++++++ drivers/of/of_private.h | 3 +++ drivers/of/of_test.c | 3 +++ drivers/of/overlay_test.c | 3 +++ 4 files changed, 24 insertions(+) diff --git a/drivers/of/of_kunit_helpers.c b/drivers/of/of_kunit_helpers.c index 287d6c91bb37..7b3ed5a382aa 100644 --- a/drivers/of/of_kunit_helpers.c +++ b/drivers/of/of_kunit_helpers.c @@ -10,6 +10,19 @@ #include #include +#include "of_private.h" + +/** + * of_root_kunit_skip() - Skip test if the root node isn't populated + * @test: test to skip if the root node isn't populated + */ +void of_root_kunit_skip(struct kunit *test) +{ + if (IS_ENABLED(CONFIG_ARM64) && IS_ENABLED(CONFIG_ACPI) && !of_root) + kunit_skip(test, "arm64+acpi doesn't populate a root node"); +} +EXPORT_SYMBOL_GPL(of_root_kunit_skip); + #if defined(CONFIG_OF_OVERLAY) && defined(CONFIG_OF_EARLY_FLATTREE) static void of_overlay_fdt_apply_kunit_exit(void *ovcs_id) @@ -36,6 +49,8 @@ int of_overlay_fdt_apply_kunit(struct kunit *test, void *overlay_fdt, int ret; int *copy_id; + of_root_kunit_skip(test); + copy_id = kunit_kmalloc(test, sizeof(*copy_id), GFP_KERNEL); if (!copy_id) return -ENOMEM; diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index 04aa2a91f851..c235d6c909a1 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -42,6 +42,9 @@ extern raw_spinlock_t devtree_lock; extern struct list_head aliases_lookup; extern struct kset *of_kset; +struct kunit; +extern void of_root_kunit_skip(struct kunit *test); + #if defined(CONFIG_OF_DYNAMIC) extern int of_property_notify(int action, struct device_node *np, struct property *prop, struct property *old_prop); diff --git a/drivers/of/of_test.c b/drivers/of/of_test.c index c85a258bc6ae..b0557ded838f 100644 --- a/drivers/of/of_test.c +++ b/drivers/of/of_test.c @@ -7,6 +7,8 @@ #include +#include "of_private.h" + /* * Test that the root node "/" can be found by path. */ @@ -36,6 +38,7 @@ static struct kunit_case of_dtb_test_cases[] = { static int of_dtb_test_init(struct kunit *test) { + of_root_kunit_skip(test); if (!IS_ENABLED(CONFIG_OF_EARLY_FLATTREE)) kunit_skip(test, "requires CONFIG_OF_EARLY_FLATTREE"); diff --git a/drivers/of/overlay_test.c b/drivers/of/overlay_test.c index 19695bdf77be..1f76d50fb16a 100644 --- a/drivers/of/overlay_test.c +++ b/drivers/of/overlay_test.c @@ -11,6 +11,8 @@ #include #include +#include "of_private.h" + static const char * const kunit_node_name = "kunit-test"; static const char * const kunit_compatible = "test,empty"; @@ -62,6 +64,7 @@ static void of_overlay_apply_kunit_cleanup(struct kunit *test) struct device *dev; struct device_node *np; + of_root_kunit_skip(test); if (!IS_ENABLED(CONFIG_OF_EARLY_FLATTREE)) kunit_skip(test, "requires CONFIG_OF_EARLY_FLATTREE for root node"); -- 2.51.0 From 8956c582ac6b1693a351230179f898979dd00bdf Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 5 Oct 2024 10:53:29 +0200 Subject: [PATCH 12/16] powerpc/8xx: Fix kernel DTLB miss on dcbz Following OOPS is encountered while loading test_bpf module on powerpc 8xx: [ 218.835567] BUG: Unable to handle kernel data access on write at 0xcb000000 [ 218.842473] Faulting instruction address: 0xc0017a80 [ 218.847451] Oops: Kernel access of bad area, sig: 11 [#1] [ 218.852854] BE PAGE_SIZE=16K PREEMPT CMPC885 [ 218.857207] SAF3000 DIE NOTIFICATION [ 218.860713] Modules linked in: test_bpf(+) test_module [ 218.865867] CPU: 0 UID: 0 PID: 527 Comm: insmod Not tainted 6.11.0-s3k-dev-09856-g3de3d71ae2e6-dirty #1280 [ 218.875546] Hardware name: MIAE 8xx 0x500000 CMPC885 [ 218.880521] NIP: c0017a80 LR: beab859c CTR: 000101d4 [ 218.885584] REGS: cac2bc90 TRAP: 0300 Not tainted (6.11.0-s3k-dev-09856-g3de3d71ae2e6-dirty) [ 218.894308] MSR: 00009032 CR: 55005555 XER: a0007100 [ 218.901290] DAR: cb000000 DSISR: c2000000 [ 218.901290] GPR00: 000185d1 cac2bd50 c21b9580 caf7c030 c3883fcc 00000008 cafffffc 00000000 [ 218.901290] GPR08: 00040000 18300000 20000000 00000004 99005555 100d815e ca669d08 00000369 [ 218.901290] GPR16: ca730000 00000000 ca2c004c 00000000 00000000 0000035d 00000311 00000369 [ 218.901290] GPR24: ca732240 00000001 00030ba3 c3800000 00000000 00185d48 caf7c000 ca2c004c [ 218.941087] NIP [c0017a80] memcpy+0x88/0xec [ 218.945277] LR [beab859c] test_bpf_init+0x22c/0x3c90 [test_bpf] [ 218.951476] Call Trace: [ 218.953916] [cac2bd50] [beab8570] test_bpf_init+0x200/0x3c90 [test_bpf] (unreliable) [ 218.962034] [cac2bde0] [c0004c04] do_one_initcall+0x4c/0x1fc [ 218.967706] [cac2be40] [c00a2ec4] do_init_module+0x68/0x360 [ 218.973292] [cac2be60] [c00a5194] init_module_from_file+0x8c/0xc0 [ 218.979401] [cac2bed0] [c00a5568] sys_finit_module+0x250/0x3f0 [ 218.985248] [cac2bf20] [c000e390] system_call_exception+0x8c/0x15c [ 218.991444] [cac2bf30] [c00120a8] ret_from_syscall+0x0/0x28 This happens in the main loop of memcpy() ==> c0017a80: 7c 0b 37 ec dcbz r11,r6 c0017a84: 80 e4 00 04 lwz r7,4(r4) c0017a88: 81 04 00 08 lwz r8,8(r4) c0017a8c: 81 24 00 0c lwz r9,12(r4) c0017a90: 85 44 00 10 lwzu r10,16(r4) c0017a94: 90 e6 00 04 stw r7,4(r6) c0017a98: 91 06 00 08 stw r8,8(r6) c0017a9c: 91 26 00 0c stw r9,12(r6) c0017aa0: 95 46 00 10 stwu r10,16(r6) c0017aa4: 42 00 ff dc bdnz c0017a80 Commit ac9f97ff8b32 ("powerpc/8xx: Inconditionally use task PGDIR in DTLB misses") relies on re-reading DAR register to know if an error is due to a missing copy of a PMD entry in task's PGDIR, allthough DAR was already read in the exception prolog and copied into thread struct. This is because is it done very early in the exception and there are not enough registers available to keep a pointer to thread struct. However, dcbz instruction is buggy and doesn't update DAR register on fault. That is detected and generates a call to FixupDAR workaround which updates DAR copy in thread struct but doesn't fix DAR register. Let's fix DAR in addition to the update of DAR copy in thread struct. Fixes: ac9f97ff8b32 ("powerpc/8xx: Inconditionally use task PGDIR in DTLB misses") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/2b851399bd87e81c6ccb87ea3a7a6b32c7aa04d7.1728118396.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/head_8xx.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 811a7130505c..56c5ebe21b99 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -494,6 +494,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ bctr /* jump into table */ 152: mfdar r11 + mtdar r10 mtctr r11 /* restore ctr reg from DAR */ mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) -- 2.51.0 From a0cc649353bb726d4aa0db60dce467432197b746 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Oct 2024 21:28:01 -0400 Subject: [PATCH 13/16] selftests/rseq: Fix mm_cid test failure Adapt the rseq.c/rseq.h code to follow GNU C library changes introduced by: glibc commit 2e456ccf0c34 ("Linux: Make __rseq_size useful for feature detection (bug 31965)") Without this fix, rseq selftests for mm_cid fail: ./run_param_test.sh Default parameters Running test spinlock Running compare-twice test spinlock Running mm_cid test spinlock Error: cpu id getter unavailable Fixes: 18c2355838e7 ("selftests/rseq: Implement rseq mm_cid field support") Signed-off-by: Mathieu Desnoyers Cc: Peter Zijlstra CC: Boqun Feng CC: "Paul E. McKenney" Cc: Shuah Khan CC: Carlos O'Donell CC: Florian Weimer CC: linux-kselftest@vger.kernel.org CC: stable@vger.kernel.org Signed-off-by: Shuah Khan --- tools/testing/selftests/rseq/rseq.c | 110 +++++++++++++++++++--------- tools/testing/selftests/rseq/rseq.h | 10 +-- 2 files changed, 77 insertions(+), 43 deletions(-) diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 96e812bdf8a4..5b9772cdf265 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -60,12 +60,6 @@ unsigned int rseq_size = -1U; /* Flags used during rseq registration. */ unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -unsigned int rseq_feature_size = -1U; - static int rseq_ownership; static int rseq_reg_success; /* At least one rseq registration has succeded. */ @@ -111,6 +105,43 @@ int rseq_available(void) } } +/* The rseq areas need to be at least 32 bytes. */ +static +unsigned int get_rseq_min_alloc_size(void) +{ + unsigned int alloc_size = rseq_size; + + if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) + alloc_size = ORIG_RSEQ_ALLOC_SIZE; + return alloc_size; +} + +/* + * Return the feature size supported by the kernel. + * + * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): + * + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). + * + * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. + */ +static +unsigned int get_rseq_kernel_feature_size(void) +{ + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; +} + int rseq_register_current_thread(void) { int rc; @@ -119,7 +150,7 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { if (RSEQ_READ_ONCE(rseq_reg_success)) { /* Incoherent success/failure within process. */ @@ -140,28 +171,12 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; } -static -unsigned int get_rseq_feature_size(void) -{ - unsigned long auxv_rseq_feature_size, auxv_rseq_align; - - auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); - assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); - - auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); - assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); - if (auxv_rseq_feature_size) - return auxv_rseq_feature_size; - else - return ORIG_RSEQ_FEATURE_SIZE; -} - static __attribute__((constructor)) void rseq_init(void) { @@ -178,28 +193,54 @@ void rseq_init(void) } if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { + unsigned int libc_rseq_size; + /* rseq registration owned by glibc */ rseq_offset = *libc_rseq_offset_p; - rseq_size = *libc_rseq_size_p; + libc_rseq_size = *libc_rseq_size_p; rseq_flags = *libc_rseq_flags_p; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size > rseq_size) - rseq_feature_size = rseq_size; + + /* + * Previous versions of glibc expose the value + * 32 even though the kernel only supported 20 + * bytes initially. Therefore treat 32 as a + * special-case. glibc 2.40 exposes a 20 bytes + * __rseq_size without using getauxval(3) to + * query the supported size, while still allocating a 32 + * bytes area. Also treat 20 as a special-case. + * + * Special-cases are handled by using the following + * value as active feature set size: + * + * rseq_size = min(32, get_rseq_kernel_feature_size()) + */ + switch (libc_rseq_size) { + case ORIG_RSEQ_FEATURE_SIZE: + fallthrough; + case ORIG_RSEQ_ALLOC_SIZE: + { + unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size(); + + if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE) + rseq_size = rseq_kernel_feature_size; + else + rseq_size = ORIG_RSEQ_ALLOC_SIZE; + break; + } + default: + /* Otherwise just use the __rseq_size from libc as rseq_size. */ + rseq_size = libc_rseq_size; + break; + } return; } rseq_ownership = 1; if (!rseq_available()) { rseq_size = 0; - rseq_feature_size = 0; return; } rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); rseq_flags = 0; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) - rseq_size = ORIG_RSEQ_ALLOC_SIZE; - else - rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; } static __attribute__((destructor)) @@ -209,7 +250,6 @@ void rseq_exit(void) return; rseq_offset = 0; rseq_size = -1U; - rseq_feature_size = -1U; rseq_ownership = 0; } diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index d7364ea4d201..4e217b620e0c 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -68,12 +68,6 @@ extern unsigned int rseq_size; /* Flags used during rseq registration. */ extern unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -extern unsigned int rseq_feature_size; - enum rseq_mo { RSEQ_MO_RELAXED = 0, RSEQ_MO_CONSUME = 1, /* Unused */ @@ -193,7 +187,7 @@ static inline uint32_t rseq_current_cpu(void) static inline bool rseq_node_id_available(void) { - return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, node_id); + return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, node_id); } /* @@ -207,7 +201,7 @@ static inline uint32_t rseq_current_node_id(void) static inline bool rseq_mm_cid_available(void) { - return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, mm_cid); + return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, mm_cid); } static inline uint32_t rseq_current_mm_cid(void) -- 2.51.0 From 4ee5ca9a29384fcf3f18232fdf8474166dea8dca Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 10 Oct 2024 16:52:35 -0400 Subject: [PATCH 14/16] ftrace/selftest: Test combination of function_graph tracer and function profiler Masami reported a bug when running function graph tracing then the function profiler. The following commands would cause a kernel crash: # cd /sys/kernel/tracing/ # echo function_graph > current_tracer # echo 1 > function_profile_enabled In that order. Create a test to test this two to make sure this does not come back as a regression. Link: https://lore.kernel.org/172398528350.293426.8347220120333730248.stgit@devnote2 Link: https://lore.kernel.org/all/20241010165235.35122877@gandalf.local.home/ Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) Signed-off-by: Shuah Khan --- .../ftrace/test.d/ftrace/fgraph-profiler.tc | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc new file mode 100644 index 000000000000..ffff8646733c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function profiler with function graph tracing +# requires: function_profile_enabled set_ftrace_filter function_graph:tracer + +# The function graph tracer can now be run along side of the function +# profiler. But there was a bug that caused the combination of the two +# to crash. It also required the function graph tracer to be started +# first. +# +# This test triggers that bug +# +# We need both function_graph and profiling to run this test + +fail() { # mesg + echo $1 + exit_fail +} + +echo "Enabling function graph tracer:" +echo function_graph > current_tracer +echo "enable profiler" + +# Older kernels do not allow function_profile to be enabled with +# function graph tracer. If the below fails, mark it as unsupported +echo 1 > function_profile_enabled || exit_unsupported + +# Let it run for a bit to make sure nothing explodes +sleep 1 + +exit 0 -- 2.51.0 From 8e929cb546ee42c9a61d24fae60605e9e3192354 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 13 Oct 2024 14:33:32 -0700 Subject: [PATCH 15/16] Linux 6.12-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c5493c0c0ca1..8cf3cf528892 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From cdda1f26e74bac732eca537a69f19f6a37b641be Mon Sep 17 00:00:00 2001 From: Luca Boccassi Date: Thu, 10 Oct 2024 16:52:32 +0100 Subject: [PATCH 16/16] pidfd: add ioctl to retrieve pid info A common pattern when using pid fds is having to get information about the process, which currently requires /proc being mounted, resolving the fd to a pid, and then do manual string parsing of /proc/N/status and friends. This needs to be reimplemented over and over in all userspace projects (e.g.: I have reimplemented resolving in systemd, dbus, dbus-daemon, polkit so far), and requires additional care in checking that the fd is still valid after having parsed the data, to avoid races. Having a programmatic API that can be used directly removes all these requirements, including having /proc mounted. As discussed at LPC24, add an ioctl with an extensible struct so that more parameters can be added later if needed. Start with returning pid/tgid/ppid and creds unconditionally, and cgroupid optionally. Signed-off-by: Luca Boccassi Link: https://lore.kernel.org/r/20241010155401.2268522-1-luca.boccassi@gmail.com Signed-off-by: Christian Brauner --- fs/pidfs.c | 86 ++++++++++++++++++- include/uapi/linux/pidfd.h | 50 +++++++++++ .../testing/selftests/pidfd/pidfd_open_test.c | 82 +++++++++++++++++- 3 files changed, 214 insertions(+), 4 deletions(-) diff --git a/fs/pidfs.c b/fs/pidfs.c index 80675b6bf884..618abb1fa1b8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +115,81 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) return poll_flags; } +static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg) +{ + struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; + size_t usize = _IOC_SIZE(cmd); + struct pidfd_info kinfo = {}; + struct user_namespace *user_ns; + const struct cred *c; + __u64 mask; +#ifdef CONFIG_CGROUPS + struct cgroup *cgrp; +#endif + + if (!uinfo) + return -EINVAL; + if (usize < PIDFD_INFO_SIZE_VER0) + return -EINVAL; /* First version, no smaller struct possible */ + + if (copy_from_user(&mask, &uinfo->mask, sizeof(mask))) + return -EFAULT; + + c = get_task_cred(task); + if (!c) + return -ESRCH; + + /* Unconditionally return identifiers and credentials, the rest only on request */ + + user_ns = current_user_ns(); + kinfo.ruid = from_kuid_munged(user_ns, c->uid); + kinfo.rgid = from_kgid_munged(user_ns, c->gid); + kinfo.euid = from_kuid_munged(user_ns, c->euid); + kinfo.egid = from_kgid_munged(user_ns, c->egid); + kinfo.suid = from_kuid_munged(user_ns, c->suid); + kinfo.sgid = from_kgid_munged(user_ns, c->sgid); + kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid); + kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid); + kinfo.mask |= PIDFD_INFO_CREDS; + put_cred(c); + +#ifdef CONFIG_CGROUPS + rcu_read_lock(); + cgrp = task_dfl_cgroup(task); + kinfo.cgroupid = cgroup_id(cgrp); + kinfo.mask |= PIDFD_INFO_CGROUPID; + rcu_read_unlock(); +#endif + + /* + * Copy pid/tgid last, to reduce the chances the information might be + * stale. Note that it is not possible to ensure it will be valid as the + * task might return as soon as the copy_to_user finishes, but that's ok + * and userspace expects that might happen and can act accordingly, so + * this is just best-effort. What we can do however is checking that all + * the fields are set correctly, or return ESRCH to avoid providing + * incomplete information. */ + + kinfo.ppid = task_ppid_nr_ns(task, NULL); + kinfo.tgid = task_tgid_vnr(task); + kinfo.pid = task_pid_vnr(task); + kinfo.mask |= PIDFD_INFO_PID; + + if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) + return -ESRCH; + + /* + * If userspace and the kernel have the same struct size it can just + * be copied. If userspace provides an older struct, only the bits that + * userspace knows about will be copied. If userspace provides a new + * struct, only the bits that the kernel knows about will be copied. + */ + if (copy_to_user(uinfo, &kinfo, min(usize, sizeof(kinfo)))) + return -EFAULT; + + return 0; +} + static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct task_struct *task __free(put_task) = NULL; @@ -122,13 +198,17 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct ns_common *ns_common = NULL; struct pid_namespace *pid_ns; - if (arg) - return -EINVAL; - task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; + /* Extensible IOCTL that does not open namespace FDs, take a shortcut */ + if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO)) + return pidfd_info(task, cmd, arg); + + if (arg) + return -EINVAL; + scoped_guard(task_lock, task) { nsp = task->nsproxy; if (nsp) diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 565fc0629fff..4540f6301b8c 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -16,6 +16,55 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +/* Flags for pidfd_info. */ +#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */ +#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */ +#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */ + +#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */ + +struct pidfd_info { + /* + * This mask is similar to the request_mask in statx(2). + * + * Userspace indicates what extensions or expensive-to-calculate fields + * they want by setting the corresponding bits in mask. The kernel + * will ignore bits that it does not know about. + * + * When filling the structure, the kernel will only set bits + * corresponding to the fields that were actually filled by the kernel. + * This also includes any future extensions that might be automatically + * filled. If the structure size is too small to contain a field + * (requested or not), to avoid confusion the mask will not + * contain a bit for that field. + * + * As such, userspace MUST verify that mask contains the + * corresponding flags after the ioctl(2) returns to ensure that it is + * using valid data. + */ + __u64 mask; + /* + * The information contained in the following fields might be stale at the + * time it is received, as the target process might have exited as soon as + * the IOCTL was processed, and there is no way to avoid that. However, it + * is guaranteed that if the call was successful, then the information was + * correct and referred to the intended process at the time the work was + * performed. */ + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; + #define PIDFS_IOCTL_MAGIC 0xFF #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) @@ -28,5 +77,6 @@ #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) #endif /* _UAPI_LINUX_PIDFD_H */ diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index c62564c264b1..ce413a221bac 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -21,6 +22,32 @@ #include "pidfd.h" #include "../kselftest.h" +#ifndef PIDFS_IOCTL_MAGIC +#define PIDFS_IOCTL_MAGIC 0xFF +#endif + +#ifndef PIDFD_GET_INFO +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) +#define PIDFD_INFO_CGROUPID (1UL << 0) + +struct pidfd_info { + __u64 request_mask; + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; +#endif + static int safe_int(const char *numstr, int *converted) { char *err = NULL; @@ -120,10 +147,13 @@ out: int main(int argc, char **argv) { + struct pidfd_info info = { + .request_mask = PIDFD_INFO_CGROUPID, + }; int pidfd = -1, ret = 1; pid_t pid; - ksft_set_plan(3); + ksft_set_plan(4); pidfd = sys_pidfd_open(-1, 0); if (pidfd >= 0) { @@ -153,6 +183,56 @@ int main(int argc, char **argv) pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1); ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid); + if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) { + ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno)); + goto on_error; + } + if (info.pid != pid) { + ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ppid != getppid()) { + ksft_print_msg("ppid %d does not match ppid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ruid != getuid()) { + ksft_print_msg("uid %d does not match uid from ioctl %d\n", + getuid(), info.ruid); + goto on_error; + } + if (info.rgid != getgid()) { + ksft_print_msg("gid %d does not match gid from ioctl %d\n", + getgid(), info.rgid); + goto on_error; + } + if (info.euid != geteuid()) { + ksft_print_msg("euid %d does not match euid from ioctl %d\n", + geteuid(), info.euid); + goto on_error; + } + if (info.egid != getegid()) { + ksft_print_msg("egid %d does not match egid from ioctl %d\n", + getegid(), info.egid); + goto on_error; + } + if (info.suid != geteuid()) { + ksft_print_msg("suid %d does not match suid from ioctl %d\n", + geteuid(), info.suid); + goto on_error; + } + if (info.sgid != getegid()) { + ksft_print_msg("sgid %d does not match sgid from ioctl %d\n", + getegid(), info.sgid); + goto on_error; + } + if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) { + ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n"); + goto on_error; + } + ksft_test_result_pass("get info from pidfd test: passed\n"); + ret = 0; on_error: -- 2.51.0