From 702c290a1cb16f4a64567cae0bedb848399f7915 Mon Sep 17 00:00:00 2001 From: Gilad Naaman Date: Mon, 4 Nov 2024 08:35:44 +0000 Subject: [PATCH 01/16] sctp: Avoid enqueuing addr events redundantly Avoid modifying or enqueuing new events if it's possible to tell that no one will consume them. Since enqueueing requires searching the current queue for opposite events for the same address, adding addresses en-masse turns this inetaddr_event into a bottle-neck, as it will get slower and slower with each address added. Signed-off-by: Gilad Naaman Acked-by: Xin Long Link: https://patch.msgid.link/20241104083545.114-1-gnaaman@drivenets.com Signed-off-by: Paolo Abeni --- net/sctp/ipv6.c | 2 +- net/sctp/protocol.c | 16 +++++++++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f7b809c0d142..b96c849545ae 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -103,10 +103,10 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr) && addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) { - sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); break; } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 39ca5403d4d7..8b9a1b96695e 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -738,6 +738,20 @@ void sctp_addr_wq_mgmt(struct net *net, struct sctp_sockaddr_entry *addr, int cm */ spin_lock_bh(&net->sctp.addr_wq_lock); + + /* Avoid searching the queue or modifying it if there are no consumers, + * as it can lead to performance degradation if addresses are modified + * en-masse. + * + * If the queue already contains some events, update it anyway to avoid + * ugly races between new sessions and new address events. + */ + if (list_empty(&net->sctp.auto_asconf_splist) && + list_empty(&net->sctp.addr_waitq)) { + spin_unlock_bh(&net->sctp.addr_wq_lock); + return; + } + /* Offsets existing events in addr_wq */ addrw = sctp_addr_wq_lookup(net, addr); if (addrw) { @@ -808,10 +822,10 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, if (addr->a.sa.sa_family == AF_INET && addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { - sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; list_del_rcu(&addr->list); + sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); break; } } -- 2.51.0 From 84b9749a3a704dcc824a88aa8267247c801d51e4 Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Wed, 6 Nov 2024 10:12:28 +0800 Subject: [PATCH 02/16] proc/softirqs: replace seq_printf with seq_put_decimal_ull_width seq_printf is costy, on a system with n CPUs, reading /proc/softirqs would yield 10*n decimal values, and the extra cost parsing format string grows linearly with number of cpus. Replace seq_printf with seq_put_decimal_ull_width have significant performance improvement. On an 8CPUs system, reading /proc/softirqs show ~40% performance gain with this patch. Signed-off-by: David Wang <00107082@163.com> Signed-off-by: Linus Torvalds --- fs/proc/softirqs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index f4616083faef..04bb29721419 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c @@ -20,7 +20,7 @@ static int show_softirqs(struct seq_file *p, void *v) for (i = 0; i < NR_SOFTIRQS; i++) { seq_printf(p, "%12s:", softirq_to_name[i]); for_each_possible_cpu(j) - seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); + seq_put_decimal_ull_width(p, " ", kstat_softirqs_cpu(i, j), 10); seq_putc(p, '\n'); } return 0; -- 2.51.0 From de88df01796b309903b70888fbdf2b89607e3a6a Mon Sep 17 00:00:00 2001 From: Wenjia Zhang Date: Wed, 6 Nov 2024 09:26:12 +0100 Subject: [PATCH 03/16] net/smc: Fix lookup of netdev by using ib_device_get_netdev() The SMC-R variant of the SMC protocol used direct call to function ib_device_ops.get_netdev() to lookup netdev. As we used mlx5 device driver to run SMC-R, it failed to find a device, because in mlx5_ib the internal net device management for retrieving net devices was replaced by a common interface ib_device_get_netdev() in commit 8d159eb2117b ("RDMA/mlx5: Use IB set_netdev and get_netdev functions"). Since such direct accesses to the internal net device management is not recommended at all, update the SMC-R code to use proper API ib_device_get_netdev(). Fixes: 54903572c23c ("net/smc: allow pnetid-less configuration") Reported-by: Aswin K Reviewed-by: Gerd Bayer Reviewed-by: Halil Pasic Reviewed-by: Simon Horman Reviewed-by: Dust Li Reviewed-by: Wen Gu Reviewed-by: Zhu Yanjun Reviewed-by: D. Wythe Signed-off-by: Wenjia Zhang Reviewed-by: Leon Romanovsky Link: https://patch.msgid.link/20241106082612.57803-1-wenjia@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/smc/smc_ib.c | 8 ++------ net/smc/smc_pnet.c | 4 +--- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9297dc20bfe2..9c563cdbea90 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -899,9 +899,7 @@ static void smc_copy_netdev_ifindex(struct smc_ib_device *smcibdev, int port) struct ib_device *ibdev = smcibdev->ibdev; struct net_device *ndev; - if (!ibdev->ops.get_netdev) - return; - ndev = ibdev->ops.get_netdev(ibdev, port + 1); + ndev = ib_device_get_netdev(ibdev, port + 1); if (ndev) { smcibdev->ndev_ifidx[port] = ndev->ifindex; dev_put(ndev); @@ -921,9 +919,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event) port_cnt = smcibdev->ibdev->phys_port_cnt; for (i = 0; i < min_t(size_t, port_cnt, SMC_MAX_PORTS); i++) { libdev = smcibdev->ibdev; - if (!libdev->ops.get_netdev) - continue; - lndev = libdev->ops.get_netdev(libdev, i + 1); + lndev = ib_device_get_netdev(libdev, i + 1); dev_put(lndev); if (lndev != ndev) continue; diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index a04aa0e882f8..716808f374a8 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -1054,9 +1054,7 @@ static void smc_pnet_find_rdma_dev(struct net_device *netdev, for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; - if (!ibdev->ibdev->ops.get_netdev) - continue; - ndev = ibdev->ibdev->ops.get_netdev(ibdev->ibdev, i); + ndev = ib_device_get_netdev(ibdev->ibdev, i); if (!ndev) continue; dev_put(ndev); -- 2.51.0 From fc9de52de38f656399d2ce40f7349a6b5f86e787 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 6 Nov 2024 13:03:22 +0000 Subject: [PATCH 04/16] rxrpc: Fix missing locking causing hanging calls If a call gets aborted (e.g. because kafs saw a signal) between it being queued for connection and the I/O thread picking up the call, the abort will be prioritised over the connection and it will be removed from local->new_client_calls by rxrpc_disconnect_client_call() without a lock being held. This may cause other calls on the list to disappear if a race occurs. Fix this by taking the client_call_lock when removing a call from whatever list its ->wait_link happens to be on. Signed-off-by: David Howells cc: linux-afs@lists.infradead.org Reported-by: Marc Dionne Fixes: 9d35d880e0e4 ("rxrpc: Move client call connection to the I/O thread") Link: https://patch.msgid.link/726660.1730898202@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 1 + net/rxrpc/conn_client.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index a1b126a6b0d7..cc22596c7250 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -287,6 +287,7 @@ EM(rxrpc_call_see_input, "SEE input ") \ EM(rxrpc_call_see_release, "SEE release ") \ EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ + EM(rxrpc_call_see_waiting_call, "SEE q-conn ") \ E_(rxrpc_call_see_zap, "SEE zap ") #define rxrpc_txqueue_traces \ diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index d25bf1cf3670..bb11e8289d6d 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -516,6 +516,7 @@ void rxrpc_connect_client_calls(struct rxrpc_local *local) spin_lock(&local->client_call_lock); list_move_tail(&call->wait_link, &bundle->waiting_calls); + rxrpc_see_call(call, rxrpc_call_see_waiting_call); spin_unlock(&local->client_call_lock); if (rxrpc_bundle_has_space(bundle)) @@ -586,7 +587,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call _debug("call is waiting"); ASSERTCMP(call->call_id, ==, 0); ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); + /* May still be on ->new_client_calls. */ + spin_lock(&local->client_call_lock); list_del_init(&call->wait_link); + spin_unlock(&local->client_call_lock); return; } -- 2.51.0 From d293958a8595ba566fb90b99da4d6263e14fee15 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Nov 2024 22:19:22 +0000 Subject: [PATCH 05/16] net/smc: do not leave a dangling sk pointer in __smc_create() Thanks to commit 4bbd360a5084 ("socket: Print pf->create() when it does not clear sock->sk on failure."), syzbot found an issue with AF_SMC: smc_create must clear sock->sk on failure, family: 43, type: 1, protocol: 0 WARNING: CPU: 0 PID: 5827 at net/socket.c:1565 __sock_create+0x96f/0xa30 net/socket.c:1563 Modules linked in: CPU: 0 UID: 0 PID: 5827 Comm: syz-executor259 Not tainted 6.12.0-rc6-next-20241106-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 RIP: 0010:__sock_create+0x96f/0xa30 net/socket.c:1563 Code: 03 00 74 08 4c 89 e7 e8 4f 3b 85 f8 49 8b 34 24 48 c7 c7 40 89 0c 8d 8b 54 24 04 8b 4c 24 0c 44 8b 44 24 08 e8 32 78 db f7 90 <0f> 0b 90 90 e9 d3 fd ff ff 89 e9 80 e1 07 fe c1 38 c1 0f 8c ee f7 RSP: 0018:ffffc90003e4fda0 EFLAGS: 00010246 RAX: 099c6f938c7f4700 RBX: 1ffffffff1a595fd RCX: ffff888034823c00 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 00000000ffffffe9 R08: ffffffff81567052 R09: 1ffff920007c9f50 R10: dffffc0000000000 R11: fffff520007c9f51 R12: ffffffff8d2cafe8 R13: 1ffffffff1a595fe R14: ffffffff9a789c40 R15: ffff8880764298c0 FS: 000055557b518380(0000) GS:ffff8880b8600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa62ff43225 CR3: 0000000031628000 CR4: 00000000003526f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: sock_create net/socket.c:1616 [inline] __sys_socket_create net/socket.c:1653 [inline] __sys_socket+0x150/0x3c0 net/socket.c:1700 __do_sys_socket net/socket.c:1714 [inline] __se_sys_socket net/socket.c:1712 [inline] For reference, see commit 2d859aff775d ("Merge branch 'do-not-leave-dangling-sk-pointers-in-pf-create-functions'") Fixes: d25a92ccae6b ("net/smc: Introduce IPPROTO_SMC") Signed-off-by: Eric Dumazet Cc: Ignat Korchagin Cc: D. Wythe Cc: Dust Li Reviewed-by: Kuniyuki Iwashima Reviewed-by: Wenjia Zhang Link: https://patch.msgid.link/20241106221922.1544045-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/smc/af_smc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0316217b7687..9d76e902fd77 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3359,8 +3359,10 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, else rc = smc_create_clcsk(net, sk, family); - if (rc) + if (rc) { sk_common_release(sk); + sock->sk = NULL; + } out: return rc; } -- 2.51.0 From 71712cf519faeed529549a79559c06c7fc250a15 Mon Sep 17 00:00:00 2001 From: Wentao Liang Date: Thu, 7 Nov 2024 10:17:56 +0800 Subject: [PATCH 06/16] drivers: net: ionic: add missed debugfs cleanup to ionic_probe() error path The ionic_setup_one() creates a debugfs entry for ionic upon successful execution. However, the ionic_probe() does not release the dentry before returning, resulting in a memory leak. To fix this bug, we add the ionic_debugfs_del_dev() to release the resources in a timely manner before returning. Fixes: 0de38d9f1dba ("ionic: extract common bits from ionic_probe") Signed-off-by: Wentao Liang Acked-by: Shannon Nelson Link: https://patch.msgid.link/20241107021756.1677-1-liangwentao@iscas.ac.cn Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c index b93791d6b593..f5dc876eb500 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c @@ -394,6 +394,7 @@ err_out_free_irqs: err_out_pci: ionic_dev_teardown(ionic); ionic_clear_pci(ionic); + ionic_debugfs_del_dev(ionic); err_out: mutex_destroy(&ionic->dev_cmd_lock); ionic_devlink_free(ionic); -- 2.51.0 From 9c477088b60dd23f3120b2b01b14d85047a033da Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 6 Nov 2024 21:19:59 +0100 Subject: [PATCH 07/16] net: phy: make genphy_c45_write_eee_adv() static genphy_c45_write_eee_adv() isn't used outside phy-c45.c, so make it static. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/d23bd784-44e6-4a15-af3a-b37379156521@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-c45.c | 3 ++- include/linux/phy.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 8c4dd94c7495..66be5832ba51 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -680,7 +680,8 @@ EXPORT_SYMBOL_GPL(genphy_c45_read_mdix); * @phydev: target phy_device struct * @adv: the linkmode advertisement settings */ -int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv) +static int genphy_c45_write_eee_adv(struct phy_device *phydev, + unsigned long *adv) { int val, changed = 0; diff --git a/include/linux/phy.h b/include/linux/phy.h index bf0eb4e5d35c..bb09a5a128e1 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1953,7 +1953,6 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data); -int genphy_c45_write_eee_adv(struct phy_device *phydev, unsigned long *adv); int genphy_c45_an_config_eee_aneg(struct phy_device *phydev); int genphy_c45_read_eee_adv(struct phy_device *phydev, unsigned long *adv); -- 2.51.0 From bcfb95c9898a2f8e3c94ea7c37a55b11739b03fa Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 6 Nov 2024 21:21:11 +0100 Subject: [PATCH 08/16] net: phy: export genphy_c45_an_config_eee_aneg We'll use this function in bcm_config_lre_aneg(), therefore export it. Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/02bd7c39-7413-4433-bafc-a276089bd292@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-c45.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 66be5832ba51..c1b3576c307f 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -951,6 +951,7 @@ int genphy_c45_an_config_eee_aneg(struct phy_device *phydev) return genphy_c45_write_eee_adv(phydev, phydev->advertising_eee); } +EXPORT_SYMBOL_GPL(genphy_c45_an_config_eee_aneg); /** * genphy_c45_pma_baset1_read_abilities - read supported baset1 link modes from PMA -- 2.51.0 From 3cc97d2fa9876d30ff5a2665211cf11daf01beeb Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 6 Nov 2024 21:24:18 +0100 Subject: [PATCH 09/16] net: phy: broadcom: use genphy_c45_an_config_eee_aneg in bcm_config_lre_aneg bcm_config_lre_aneg() is the only user of genphy_config_eee_advert(), therefore use genphy_c45_an_config_eee_aneg() instead. The resulting functionality is equivalent, and bcm_config_lre_aneg() follows the structure of __genphy_config_aneg(). In a follow-up step genphy_config_eee_advert() can be removed. Note: We preserve the current behavior to ignore errors. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/6e5cd4ab-28bb-4d82-b449-fec85f3d1e8a@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/bcm-phy-lib.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c index 59c2c6acc134..5198d66dbbc0 100644 --- a/drivers/net/phy/bcm-phy-lib.c +++ b/drivers/net/phy/bcm-phy-lib.c @@ -1136,7 +1136,7 @@ int bcm_config_lre_aneg(struct phy_device *phydev, bool changed) { int err; - if (genphy_config_eee_advert(phydev)) + if (genphy_c45_an_config_eee_aneg(phydev) > 0) changed = true; err = bcm_setup_lre_master_slave(phydev); -- 2.51.0 From db73835f54fc57bc267d43836905588f24ddac85 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 6 Nov 2024 21:25:12 +0100 Subject: [PATCH 10/16] net: phy: remove genphy_config_eee_advert bcm_config_lre_aneg() doesn't use genphy_config_eee_advert() any longer. As this was the only user, we can remove genphy_config_eee_advert() now. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Link: https://patch.msgid.link/37da7f3e-b883-4c07-9881-b8c0516822b7@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 23 ----------------------- include/linux/phy.h | 1 - 2 files changed, 24 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 563497a3274c..bc24c9f2786b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -2239,29 +2239,6 @@ static int genphy_c37_config_advert(struct phy_device *phydev) adv); } -/** - * genphy_config_eee_advert - disable unwanted eee mode advertisement - * @phydev: target phy_device struct - * - * Description: Writes MDIO_AN_EEE_ADV after disabling unsupported energy - * efficent ethernet modes. Returns 0 if the PHY's advertisement hasn't - * changed, and 1 if it has changed. - */ -int genphy_config_eee_advert(struct phy_device *phydev) -{ - int err; - - /* Nothing to disable */ - if (!phydev->eee_broken_modes) - return 0; - - err = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, - phydev->eee_broken_modes, 0); - /* If the call failed, we assume that EEE is not supported */ - return err < 0 ? 0 : err; -} -EXPORT_SYMBOL(genphy_config_eee_advert); - /** * genphy_setup_forced - configures/forces speed/duplex from @phydev * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index bb09a5a128e1..1e4127c495c0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1885,7 +1885,6 @@ int genphy_read_abilities(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart); -int genphy_config_eee_advert(struct phy_device *phydev); int __genphy_config_aneg(struct phy_device *phydev, bool changed); int genphy_aneg_done(struct phy_device *phydev); int genphy_update_link(struct phy_device *phydev); -- 2.51.0 From 48171c65f61148b0025128b70837280123f1309d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 6 Nov 2024 22:37:32 +0100 Subject: [PATCH 11/16] ipv4: Prepare ip_route_output() to future .flowi4_tos conversion. Convert the "tos" parameter of ip_route_output() to dscp_t. This way we'll have a dscp_t value directly available when .flowi4_tos will eventually be converted to dscp_t. All ip_route_output() callers but one set this "tos" parameter to 0 and therefore don't need to be adapted to the new prototype. Only br_nf_pre_routing_finish() needs conversion. It can just use ip4h_dscp() to get the DSCP field from the IPv4 header. Signed-off-by: Guillaume Nault Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/0f10d031dd44c70aae9bc6e19391cb30d5c2fe71.1730928699.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 6 +++--- net/bridge/br_netfilter_hooks.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 586e59f7ed8a..0a690adfdff5 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -156,12 +156,12 @@ static inline struct rtable *ip_route_output_key(struct net *net, struct flowi4 * structure is only partially set, it may bypass some fib-rules. */ static inline struct rtable *ip_route_output(struct net *net, __be32 daddr, - __be32 saddr, u8 tos, int oif, - __u8 scope) + __be32 saddr, dscp_t dscp, + int oif, __u8 scope) { struct flowi4 fl4 = { .flowi4_oif = oif, - .flowi4_tos = tos, + .flowi4_tos = inet_dscp_to_dsfield(dscp), .flowi4_scope = scope, .daddr = daddr, .saddr = saddr, diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 17a5f5923d61..7f2f40cef5fe 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -406,7 +406,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ goto free_skb; rt = ip_route_output(net, iph->daddr, 0, - iph->tos & INET_DSCP_MASK, 0, + ip4h_dscp(iph), 0, RT_SCOPE_UNIVERSE); if (!IS_ERR(rt)) { /* - Bridged-and-DNAT'ed traffic doesn't -- 2.51.0 From 38a1f50a5efb5a941f3491d4d2353d12a87d04a0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Nov 2024 13:18:17 +0000 Subject: [PATCH 12/16] phonet: do not call synchronize_rcu() from phonet_route_del() Calling synchronize_rcu() while holding rcu_read_lock() is not permitted [1] Move the synchronize_rcu() + dev_put() to route_doit(). Alternative would be to not use rcu_read_lock() in route_doit(). [1] WARNING: suspicious RCU usage 6.12.0-rc5-syzkaller-01056-gf07a6e6ceb05 #0 Not tainted ----------------------------- kernel/rcu/tree.c:4092 Illegal synchronize_rcu() in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 1 lock held by syz-executor427/5840: #0: ffffffff8e937da0 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire include/linux/rcupdate.h:337 [inline] #0: ffffffff8e937da0 (rcu_read_lock){....}-{1:2}, at: rcu_read_lock include/linux/rcupdate.h:849 [inline] #0: ffffffff8e937da0 (rcu_read_lock){....}-{1:2}, at: route_doit+0x3d6/0x640 net/phonet/pn_netlink.c:264 stack backtrace: CPU: 1 UID: 0 PID: 5840 Comm: syz-executor427 Not tainted 6.12.0-rc5-syzkaller-01056-gf07a6e6ceb05 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 lockdep_rcu_suspicious+0x226/0x340 kernel/locking/lockdep.c:6821 synchronize_rcu+0xea/0x360 kernel/rcu/tree.c:4089 phonet_route_del+0xc6/0x140 net/phonet/pn_dev.c:409 route_doit+0x514/0x640 net/phonet/pn_netlink.c:275 rtnetlink_rcv_msg+0x791/0xcf0 net/core/rtnetlink.c:6790 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2551 netlink_unicast_kernel net/netlink/af_netlink.c:1331 [inline] netlink_unicast+0x7f6/0x990 net/netlink/af_netlink.c:1357 netlink_sendmsg+0x8e4/0xcb0 net/netlink/af_netlink.c:1901 sock_sendmsg_nosec net/socket.c:729 [inline] __sock_sendmsg+0x221/0x270 net/socket.c:744 sock_write_iter+0x2d7/0x3f0 net/socket.c:1165 new_sync_write fs/read_write.c:590 [inline] vfs_write+0xaeb/0xd30 fs/read_write.c:683 ksys_write+0x183/0x2b0 fs/read_write.c:736 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f Fixes: 17a1ac0018ae ("phonet: Don't hold RTNL for route_doit().") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Cc: Remi Denis-Courmont Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20241106131818.1240710-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/phonet/pn_dev.c | 5 +++-- net/phonet/pn_netlink.c | 12 ++++++++++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 19234d664c4f..5c36bae37b8f 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -406,8 +406,9 @@ int phonet_route_del(struct net_device *dev, u8 daddr) if (!dev) return -ENOENT; - synchronize_rcu(); - dev_put(dev); + + /* Note : our caller must call synchronize_rcu() and dev_put(dev) */ + return 0; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index ca1f04e4a2d9..b9043c92dc24 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -233,6 +233,7 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct nlattr *tb[RTA_MAX+1]; + bool sync_needed = false; struct net_device *dev; struct rtmsg *rtm; u32 ifindex; @@ -269,13 +270,20 @@ static int route_doit(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (nlh->nlmsg_type == RTM_NEWROUTE) + if (nlh->nlmsg_type == RTM_NEWROUTE) { err = phonet_route_add(dev, dst); - else + } else { err = phonet_route_del(dev, dst); + if (!err) + sync_needed = true; + } rcu_read_unlock(); + if (sync_needed) { + synchronize_rcu(); + dev_put(dev); + } if (!err) rtm_phonet_notify(net, nlh->nlmsg_type, ifindex, dst); -- 2.51.0 From bc515ed06652b506794f323b9e53d89c01de7e4a Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Wed, 6 Nov 2024 09:07:17 +0000 Subject: [PATCH 13/16] netlink: specs: Add a spec for neighbor tables in rtnetlink Add a YNL spec for neighbour tables and neighbour entries in rtnetlink. ./tools/net/ynl/cli.py \ --spec Documentation/netlink/specs/rt_neigh.yaml \ --dump getneigh [{'cacheinfo': {'confirmed': 122664055, 'refcnt': 0, 'updated': 122658055, 'used': 122658055}, 'dst': '0.0.0.0', 'family': 2, 'flags': set(), 'ifindex': 5, 'lladr': '', 'probes': 0, 'state': {'noarp'}, 'type': 'broadcast'}, ...] Acked-by: Stanislav Fomichev Signed-off-by: Donald Hunter Link: https://patch.msgid.link/20241106090718.64713-2-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_neigh.yaml | 442 ++++++++++++++++++++++ 1 file changed, 442 insertions(+) create mode 100644 Documentation/netlink/specs/rt_neigh.yaml diff --git a/Documentation/netlink/specs/rt_neigh.yaml b/Documentation/netlink/specs/rt_neigh.yaml new file mode 100644 index 000000000000..e670b6dc07be --- /dev/null +++ b/Documentation/netlink/specs/rt_neigh.yaml @@ -0,0 +1,442 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: rt-neigh +protocol: netlink-raw +protonum: 0 + +doc: + IP neighbour management over rtnetlink. + +definitions: + - + name: ndmsg + type: struct + members: + - + name: family + type: u8 + - + name: pad + type: pad + len: 3 + - + name: ifindex + type: s32 + - + name: state + type: u16 + enum: nud-state + - + name: flags + type: u8 + enum: ntf-flags + - + name: type + type: u8 + enum: rtm-type + - + name: ndtmsg + type: struct + members: + - + name: family + type: u8 + - + name: pad + type: pad + len: 3 + - + name: nud-state + type: flags + entries: + - incomplete + - reachable + - stale + - delay + - probe + - failed + - noarp + - permanent + - + name: ntf-flags + type: flags + entries: + - use + - self + - master + - proxy + - ext-learned + - offloaded + - sticky + - router + - + name: ntf-ext-flags + type: flags + entries: + - managed + - locked + - + name: rtm-type + type: enum + entries: + - unspec + - unicast + - local + - broadcast + - anycast + - multicast + - blackhole + - unreachable + - prohibit + - throw + - nat + - xresolve + - + name: nda-cacheinfo + type: struct + members: + - + name: confirmed + type: u32 + - + name: used + type: u32 + - + name: updated + type: u32 + - + name: refcnt + type: u32 + - + name: ndt-config + type: struct + members: + - + name: key-len + type: u16 + - + name: entry-size + type: u16 + - + name: entries + type: u32 + - + name: last-flush + type: u32 + - + name: last-rand + type: u32 + - + name: hash-rnd + type: u32 + - + name: hash-mask + type: u32 + - + name: hash-chain-gc + type: u32 + - + name: proxy-qlen + type: u32 + - + name: ndt-stats + type: struct + members: + - + name: allocs + type: u64 + - + name: destroys + type: u64 + - + name: hash-grows + type: u64 + - + name: res-failed + type: u64 + - + name: lookups + type: u64 + - + name: hits + type: u64 + - + name: rcv-probes-mcast + type: u64 + - + name: rcv-probes-ucast + type: u64 + - + name: periodic-gc-runs + type: u64 + - + name: forced-gc-runs + type: u64 + - + name: table-fulls + type: u64 + +attribute-sets: + - + name: neighbour-attrs + attributes: + - + name: unspec + type: binary + value: 0 + - + name: dst + type: binary + display-hint: ipv4 + - + name: lladr + type: binary + display-hint: mac + - + name: cacheinfo + type: binary + struct: nda-cacheinfo + - + name: probes + type: u32 + - + name: vlan + type: u16 + - + name: port + type: u16 + - + name: vni + type: u32 + - + name: ifindex + type: u32 + - + name: master + type: u32 + - + name: link-netnsid + type: s32 + - + name: src-vni + type: u32 + - + name: protocol + type: u8 + - + name: nh-id + type: u32 + - + name: fdb-ext-attrs + type: binary + - + name: flags-ext + type: u32 + enum: ntf-ext-flags + - + name: ndm-state-mask + type: u16 + - + name: ndm-flags-mask + type: u8 + - + name: ndt-attrs + attributes: + - + name: name + type: string + - + name: thresh1 + type: u32 + - + name: thresh2 + type: u32 + - + name: thresh3 + type: u32 + - + name: config + type: binary + struct: ndt-config + - + name: parms + type: nest + nested-attributes: ndtpa-attrs + - + name: stats + type: binary + struct: ndt-stats + - + name: gc-interval + type: u64 + - + name: pad + type: pad + - + name: ndtpa-attrs + attributes: + - + name: ifindex + type: u32 + - + name: refcnt + type: u32 + - + name: reachable-time + type: u64 + - + name: base-reachable-time + type: u64 + - + name: retrans-time + type: u64 + - + name: gc-staletime + type: u64 + - + name: delay-probe-time + type: u64 + - + name: queue-len + type: u32 + - + name: app-probes + type: u32 + - + name: ucast-probes + type: u32 + - + name: mcast-probes + type: u32 + - + name: anycast-delay + type: u64 + - + name: proxy-delay + type: u64 + - + name: proxy-qlen + type: u32 + - + name: locktime + type: u64 + - + name: queue-lenbytes + type: u32 + - + name: mcast-reprobes + type: u32 + - + name: pad + type: pad + - + name: interval-probe-time-ms + type: u64 + +operations: + enum-model: directional + list: + - + name: newneigh + doc: Add new neighbour entry + fixed-header: ndmsg + attribute-set: neighbour-attrs + do: + request: + value: 28 + attributes: &neighbour-all + - dst + - lladdr + - probes + - vlan + - port + - vni + - ifindex + - master + - protocol + - nh-id + - flags-ext + - fdb-ext-attrs + - + name: delneigh + doc: Remove an existing neighbour entry + fixed-header: ndmsg + attribute-set: neighbour-attrs + do: + request: + value: 29 + attributes: + - dst + - ifindex + - + name: delneigh-ntf + doc: Notify a neighbour deletion + value: 29 + notify: delneigh + fixed-header: ndmsg + - + name: getneigh + doc: Get or dump neighbour entries + fixed-header: ndmsg + attribute-set: neighbour-attrs + do: + request: + value: 30 + attributes: + - dst + reply: + value: 28 + attributes: *neighbour-all + dump: + request: + attributes: + - ifindex + - master + reply: + attributes: *neighbour-all + - + name: newneigh-ntf + doc: Notify a neighbour creation + value: 28 + notify: getneigh + fixed-header: ndmsg + - + name: getneightbl + doc: Get or dump neighbour tables + fixed-header: ndtmsg + attribute-set: ndt-attrs + dump: + request: + value: 66 + reply: + value: 64 + attributes: + - name + - thresh1 + - thresh2 + - thresh3 + - config + - parms + - stats + - gc-interval + - + name: setneightbl + doc: Set neighbour tables + fixed-header: ndtmsg + attribute-set: ndt-attrs + do: + request: + value: 67 + attributes: + - name + - thresh1 + - thresh2 + - thresh3 + - parms + - gc-interval + +mcast-groups: + list: + - + name: rtnlgrp-neigh + value: 3 -- 2.51.0 From a852e3c356415f61d9329cb3a1a4c90c74570522 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Wed, 6 Nov 2024 09:07:18 +0000 Subject: [PATCH 14/16] netlink: specs: Add a spec for FIB rule management Add a YNL spec for FIB rules: ./tools/net/ynl/cli.py \ --spec Documentation/netlink/specs/rt_rule.yaml \ --dump getrule --json '{"family": 2}' [{'action': 'to-tbl', 'dst-len': 0, 'family': 2, 'flags': 0, 'protocol': 2, 'src-len': 0, 'suppress-prefixlen': '0xffffffff', 'table': 255, 'tos': 0}, ... ] Acked-by: Stanislav Fomichev Reviewed-by: Ido Schimmel Signed-off-by: Donald Hunter Link: https://patch.msgid.link/20241106090718.64713-3-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_rule.yaml | 242 +++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 Documentation/netlink/specs/rt_rule.yaml diff --git a/Documentation/netlink/specs/rt_rule.yaml b/Documentation/netlink/specs/rt_rule.yaml new file mode 100644 index 000000000000..03a8eef7952e --- /dev/null +++ b/Documentation/netlink/specs/rt_rule.yaml @@ -0,0 +1,242 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: rt-rule +protocol: netlink-raw +protonum: 0 + +doc: + FIB rule management over rtnetlink. + +definitions: + - + name: rtgenmsg + type: struct + members: + - + name: family + type: u8 + - + name: pad + type: pad + len: 3 + - + name: fib-rule-hdr + type: struct + members: + - + name: family + type: u8 + - + name: dst-len + type: u8 + - + name: src-len + type: u8 + - + name: tos + type: u8 + - + name: table + type: u8 + - + name: res1 + type: pad + len: 1 + - + name: res2 + type: pad + len: 1 + - + name: action + type: u8 + enum: fr-act + - + name: flags + type: u32 + - + name: fr-act + type: enum + entries: + - unspec + - to-tbl + - goto + - nop + - res3 + - res4 + - blackhole + - unreachable + - prohibit + - + name: fib-rule-port-range + type: struct + members: + - + name: start + type: u16 + - + name: end + type: u16 + - + name: fib-rule-uid-range + type: struct + members: + - + name: start + type: u32 + - + name: end + type: u32 + +attribute-sets: + - + name: fib-rule-attrs + attributes: + - + name: dst + type: u32 + - + name: src + type: u32 + - + name: iifname + type: string + - + name: goto + type: u32 + - + name: unused2 + type: pad + - + name: priority + type: u32 + - + name: unused3 + type: pad + - + name: unused4 + type: pad + - + name: unused5 + type: pad + - + name: fwmark + type: u32 + display-hint: hex + - + name: flow + type: u32 + - + name: tun-id + type: u64 + - + name: suppress-ifgroup + type: u32 + - + name: suppress-prefixlen + type: u32 + display-hint: hex + - + name: table + type: u32 + - + name: fwmask + type: u32 + display-hint: hex + - + name: oifname + type: string + - + name: pad + type: pad + - + name: l3mdev + type: u8 + - + name: uid-range + type: binary + struct: fib-rule-uid-range + - + name: protocol + type: u8 + - + name: ip-proto + type: u8 + - + name: sport-range + type: binary + struct: fib-rule-port-range + - + name: dport-range + type: binary + struct: fib-rule-port-range + - + name: dscp + type: u8 + +operations: + enum-model: directional + fixed-header: fib-rule-hdr + list: + - + name: newrule + doc: Add new FIB rule + attribute-set: fib-rule-attrs + do: + request: + value: 32 + attributes: &fib-rule-all + - iifname + - oifname + - priority + - fwmark + - flow + - tun-id + - fwmask + - table + - suppress-prefixlen + - suppress-ifgroup + - goto + - l3mdev + - uid-range + - protocol + - ip-proto + - sport-range + - dport-range + - dscp + - + name: newrule-ntf + doc: Notify a rule creation + value: 32 + notify: newrule + - + name: delrule + doc: Remove an existing FIB rule + attribute-set: fib-rule-attrs + do: + request: + value: 33 + attributes: *fib-rule-all + - + name: delrule-ntf + doc: Notify a rule deletion + value: 33 + notify: delrule + - + name: getrule + doc: Dump all FIB rules + attribute-set: fib-rule-attrs + dump: + request: + value: 34 + reply: + value: 32 + attributes: *fib-rule-all + +mcast-groups: + list: + - + name: rtnlgrp-ipv4-rule + value: 8 + - + name: rtnlgrp-ipv6-rule + value: 19 -- 2.51.0 From 4861333b42178fa3d8fd1bb4e2cfb2fedc968dba Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 5 Nov 2024 21:27:21 +0200 Subject: [PATCH 15/16] bonding: add ESP offload features when slaves support Add NETIF_F_GSO_ESP bit to bond's gso_partial_features if all slaves support it, such that ESP segmentation is handled by hardware if possible. Signed-off-by: Jianbo Liu Reviewed-by: Boris Pismenny Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241105192721.584822-1-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 5812e8eaccf1..9e8bdd0d0922 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1545,6 +1545,7 @@ static void bond_compute_features(struct bonding *bond) { unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; + netdev_features_t gso_partial_features = NETIF_F_GSO_ESP; netdev_features_t vlan_features = BOND_VLAN_FEATURES; netdev_features_t enc_features = BOND_ENC_FEATURES; #ifdef CONFIG_XFRM_OFFLOAD @@ -1577,6 +1578,9 @@ static void bond_compute_features(struct bonding *bond) BOND_XFRM_FEATURES); #endif /* CONFIG_XFRM_OFFLOAD */ + if (slave->dev->hw_enc_features & NETIF_F_GSO_PARTIAL) + gso_partial_features &= slave->dev->gso_partial_features; + mpls_features = netdev_increment_features(mpls_features, slave->dev->mpls_features, BOND_MPLS_FEATURES); @@ -1590,6 +1594,11 @@ static void bond_compute_features(struct bonding *bond) } bond_dev->hard_header_len = max_hard_header_len; + if (gso_partial_features & NETIF_F_GSO_ESP) + bond_dev->gso_partial_features |= NETIF_F_GSO_ESP; + else + bond_dev->gso_partial_features &= ~NETIF_F_GSO_ESP; + done: bond_dev->vlan_features = vlan_features; bond_dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL | -- 2.51.0 From 580db513b4a9d52f306580015a1872eea0a0894e Mon Sep 17 00:00:00 2001 From: Khang Nguyen Date: Tue, 5 Nov 2024 14:19:15 +0700 Subject: [PATCH 16/16] net: mctp: Expose transport binding identifier via IFLA attribute MCTP control protocol implementations are transport binding dependent. Endpoint discovery is mandatory based on transport binding. Message timing requirements are specified in each respective transport binding specification. However, we currently have no means to get this information from MCTP links. Add a IFLA_MCTP_PHYS_BINDING netlink link attribute, which represents the transport type using the DMTF DSP0239-defined type numbers, returned as part of RTM_GETLINK data. We get an IFLA_MCTP_PHYS_BINDING attribute for each MCTP link, for example: - 0x00 (unspec) for loopback interface; - 0x01 (SMBus/I2C) for mctpi2c%d interfaces; and - 0x05 (serial) for mctpserial%d interfaces. Signed-off-by: Khang Nguyen Reviewed-by: Matt Johnston Link: https://patch.msgid.link/20241105071915.821871-1-khangng@os.amperecomputing.com Signed-off-by: Jakub Kicinski --- drivers/net/mctp/mctp-i2c.c | 3 ++- drivers/net/mctp/mctp-i3c.c | 2 +- drivers/net/mctp/mctp-serial.c | 5 +++-- include/net/mctp.h | 18 ++++++++++++++++++ include/net/mctpdevice.h | 4 +++- include/uapi/linux/if_link.h | 1 + net/mctp/device.c | 12 +++++++++--- 7 files changed, 37 insertions(+), 8 deletions(-) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index e70fb6687994..d2b3f5a59141 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -880,7 +880,8 @@ static int mctp_i2c_add_netdev(struct mctp_i2c_client *mcli, goto err; } - rc = mctp_register_netdev(ndev, &mctp_i2c_mctp_ops); + rc = mctp_register_netdev(ndev, &mctp_i2c_mctp_ops, + MCTP_PHYS_BINDING_SMBUS); if (rc < 0) { dev_err(&mcli->client->dev, "register netdev \"%s\" failed %d\n", diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c index 1bc87a062686..9adad59b8676 100644 --- a/drivers/net/mctp/mctp-i3c.c +++ b/drivers/net/mctp/mctp-i3c.c @@ -607,7 +607,7 @@ __must_hold(&busdevs_lock) goto err_free_uninit; } - rc = mctp_register_netdev(ndev, NULL); + rc = mctp_register_netdev(ndev, NULL, MCTP_PHYS_BINDING_I3C); if (rc < 0) { dev_warn(&ndev->dev, "netdev register failed: %d\n", rc); goto err_free_netdev; diff --git a/drivers/net/mctp/mctp-serial.c b/drivers/net/mctp/mctp-serial.c index e63720ec3238..26c9a33fd636 100644 --- a/drivers/net/mctp/mctp-serial.c +++ b/drivers/net/mctp/mctp-serial.c @@ -23,6 +23,7 @@ #include #include +#include #include #define MCTP_SERIAL_MTU 68 /* base mtu (64) + mctp header */ @@ -470,7 +471,7 @@ static int mctp_serial_open(struct tty_struct *tty) spin_lock_init(&dev->lock); INIT_WORK(&dev->tx_work, mctp_serial_tx_work); - rc = register_netdev(ndev); + rc = mctp_register_netdev(ndev, NULL, MCTP_PHYS_BINDING_SERIAL); if (rc) goto free_netdev; @@ -492,7 +493,7 @@ static void mctp_serial_close(struct tty_struct *tty) struct mctp_serial *dev = tty->disc_data; int idx = dev->idx; - unregister_netdev(dev->netdev); + mctp_unregister_netdev(dev->netdev); ida_free(&mctp_serial_ida, idx); } diff --git a/include/net/mctp.h b/include/net/mctp.h index 28d59ae94ca3..1ecbff7116f6 100644 --- a/include/net/mctp.h +++ b/include/net/mctp.h @@ -298,4 +298,22 @@ void mctp_routes_exit(void); int mctp_device_init(void); void mctp_device_exit(void); +/* MCTP IDs and Codes from DMTF specification + * "DSP0239 Management Component Transport Protocol (MCTP) IDs and Codes" + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0239_1.11.1.pdf + */ +enum mctp_phys_binding { + MCTP_PHYS_BINDING_UNSPEC = 0x00, + MCTP_PHYS_BINDING_SMBUS = 0x01, + MCTP_PHYS_BINDING_PCIE_VDM = 0x02, + MCTP_PHYS_BINDING_USB = 0x03, + MCTP_PHYS_BINDING_KCS = 0x04, + MCTP_PHYS_BINDING_SERIAL = 0x05, + MCTP_PHYS_BINDING_I3C = 0x06, + MCTP_PHYS_BINDING_MMBI = 0x07, + MCTP_PHYS_BINDING_PCC = 0x08, + MCTP_PHYS_BINDING_UCIE = 0x09, + MCTP_PHYS_BINDING_VENDOR = 0xFF, +}; + #endif /* __NET_MCTP_H */ diff --git a/include/net/mctpdevice.h b/include/net/mctpdevice.h index 5c0d04b5c12c..957d9ef924c5 100644 --- a/include/net/mctpdevice.h +++ b/include/net/mctpdevice.h @@ -22,6 +22,7 @@ struct mctp_dev { refcount_t refs; unsigned int net; + enum mctp_phys_binding binding; const struct mctp_netdev_ops *ops; @@ -44,7 +45,8 @@ struct mctp_dev *mctp_dev_get_rtnl(const struct net_device *dev); struct mctp_dev *__mctp_dev_get(const struct net_device *dev); int mctp_register_netdev(struct net_device *dev, - const struct mctp_netdev_ops *ops); + const struct mctp_netdev_ops *ops, + enum mctp_phys_binding binding); void mctp_unregister_netdev(struct net_device *dev); void mctp_dev_hold(struct mctp_dev *mdev); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8516c1ccd57a..2575e0cd9b48 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1958,6 +1958,7 @@ struct ifla_rmnet_flags { enum { IFLA_MCTP_UNSPEC, IFLA_MCTP_NET, + IFLA_MCTP_PHYS_BINDING, __IFLA_MCTP_MAX, }; diff --git a/net/mctp/device.c b/net/mctp/device.c index 3d75b919995d..26ce34b7e88e 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -371,6 +371,8 @@ static int mctp_fill_link_af(struct sk_buff *skb, return -ENODATA; if (nla_put_u32(skb, IFLA_MCTP_NET, mdev->net)) return -EMSGSIZE; + if (nla_put_u8(skb, IFLA_MCTP_PHYS_BINDING, mdev->binding)) + return -EMSGSIZE; return 0; } @@ -385,6 +387,7 @@ static size_t mctp_get_link_af_size(const struct net_device *dev, if (!mdev) return 0; ret = nla_total_size(4); /* IFLA_MCTP_NET */ + ret += nla_total_size(1); /* IFLA_MCTP_PHYS_BINDING */ mctp_dev_put(mdev); return ret; } @@ -480,7 +483,8 @@ static int mctp_dev_notify(struct notifier_block *this, unsigned long event, } static int mctp_register_netdevice(struct net_device *dev, - const struct mctp_netdev_ops *ops) + const struct mctp_netdev_ops *ops, + enum mctp_phys_binding binding) { struct mctp_dev *mdev; @@ -489,17 +493,19 @@ static int mctp_register_netdevice(struct net_device *dev, return PTR_ERR(mdev); mdev->ops = ops; + mdev->binding = binding; return register_netdevice(dev); } int mctp_register_netdev(struct net_device *dev, - const struct mctp_netdev_ops *ops) + const struct mctp_netdev_ops *ops, + enum mctp_phys_binding binding) { int rc; rtnl_lock(); - rc = mctp_register_netdevice(dev, ops); + rc = mctp_register_netdevice(dev, ops, binding); rtnl_unlock(); return rc; -- 2.51.0