From 0713a1b3276b98c7dafbeefef00d7bc3a9119a84 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Fri, 2 May 2025 16:13:46 +0200 Subject: [PATCH 01/16] can: mcan: m_can_class_unregister(): fix order of unregistration calls If a driver is removed, the driver framework invokes the driver's remove callback. A CAN driver's remove function calls unregister_candev(), which calls net_device_ops::ndo_stop further down in the call stack for interfaces which are in the "up" state. The removal of the module causes a warning, as can_rx_offload_del() deletes the NAPI, while it is still active, because the interface is still up. To fix the warning, first unregister the network interface, which calls net_device_ops::ndo_stop, which disables the NAPI, and then call can_rx_offload_del(). Fixes: 1be37d3b0414 ("can: m_can: fix periph RX path: use rx-offload to ensure skbs are sent from softirq context") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/20250502-can-rx-offload-del-v1-3-59a9b131589d@pengutronix.de Reviewed-by: Markus Schneider-Pargmann Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/m_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 326ede9d400f..c2c116ce1087 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -2463,9 +2463,9 @@ EXPORT_SYMBOL_GPL(m_can_class_register); void m_can_class_unregister(struct m_can_classdev *cdev) { + unregister_candev(cdev->net); if (cdev->is_peripheral) can_rx_offload_del(&cdev->offload); - unregister_candev(cdev->net); } EXPORT_SYMBOL_GPL(m_can_class_unregister); -- 2.50.1 From 511e64e13d8cc72853275832e3f372607466c18c Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 29 Apr 2025 09:05:55 +0200 Subject: [PATCH 02/16] can: gw: fix RCU/BH usage in cgw_create_job() As reported by Sebastian Andrzej Siewior the use of local_bh_disable() is only feasible in uni processor systems to update the modification rules. The usual use-case to update the modification rules is to update the data of the modifications but not the modification types (AND/OR/XOR/SET) or the checksum functions itself. To omit additional memory allocations to maintain fast modification switching times, the modification description space is doubled at gw-job creation time so that only the reference to the active modification description is changed under rcu protection. Rename cgw_job::mod to cf_mod and make it a RCU pointer. Allocate in cgw_create_job() and free it together with cgw_job in cgw_job_free_rcu(). Update all users to dereference cgw_job::cf_mod with a RCU accessor and if possible once. [bigeasy: Replace mod1/mod2 from the Oliver's original patch with dynamic allocation, use RCU annotation and accessor] Reported-by: Sebastian Andrzej Siewior Closes: https://lore.kernel.org/linux-can/20231031112349.y0aLoBrz@linutronix.de/ Fixes: dd895d7f21b2 ("can: cangw: introduce optional uid to reference created routing jobs") Tested-by: Oliver Hartkopp Signed-off-by: Oliver Hartkopp Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250429070555.cs-7b_eZ@linutronix.de Signed-off-by: Marc Kleine-Budde --- net/can/gw.c | 149 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 90 insertions(+), 59 deletions(-) diff --git a/net/can/gw.c b/net/can/gw.c index ef93293c1fae..55eccb1c7620 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -130,7 +130,7 @@ struct cgw_job { u32 handled_frames; u32 dropped_frames; u32 deleted_frames; - struct cf_mod mod; + struct cf_mod __rcu *cf_mod; union { /* CAN frame data source */ struct net_device *dev; @@ -459,6 +459,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) struct cgw_job *gwj = (struct cgw_job *)data; struct canfd_frame *cf; struct sk_buff *nskb; + struct cf_mod *mod; int modidx = 0; /* process strictly Classic CAN or CAN FD frames */ @@ -506,7 +507,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. */ - if (gwj->mod.modfunc[0]) + mod = rcu_dereference(gwj->cf_mod); + if (mod->modfunc[0]) nskb = skb_copy(skb, GFP_ATOMIC); else nskb = skb_clone(skb, GFP_ATOMIC); @@ -529,8 +531,8 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ - while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) - (*gwj->mod.modfunc[modidx++])(cf, &gwj->mod); + while (modidx < MAX_MODFUNCTIONS && mod->modfunc[modidx]) + (*mod->modfunc[modidx++])(cf, mod); /* Has the CAN frame been modified? */ if (modidx) { @@ -546,11 +548,11 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) } /* check for checksum updates */ - if (gwj->mod.csumfunc.crc8) - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (mod->csumfunc.crc8) + (*mod->csumfunc.crc8)(cf, &mod->csum.crc8); - if (gwj->mod.csumfunc.xor) - (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); + if (mod->csumfunc.xor) + (*mod->csumfunc.xor)(cf, &mod->csum.xor); } /* clear the skb timestamp if not configured the other way */ @@ -581,9 +583,20 @@ static void cgw_job_free_rcu(struct rcu_head *rcu_head) { struct cgw_job *gwj = container_of(rcu_head, struct cgw_job, rcu); + /* cgw_job::cf_mod is always accessed from the same cgw_job object within + * the same RCU read section. Once cgw_job is scheduled for removal, + * cf_mod can also be removed without mandating an additional grace period. + */ + kfree(rcu_access_pointer(gwj->cf_mod)); kmem_cache_free(cgw_cache, gwj); } +/* Return cgw_job::cf_mod with RTNL protected section */ +static struct cf_mod *cgw_job_cf_mod(struct cgw_job *gwj) +{ + return rcu_dereference_protected(gwj->cf_mod, rtnl_is_locked()); +} + static int cgw_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { @@ -616,6 +629,7 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, { struct rtcanmsg *rtcan; struct nlmsghdr *nlh; + struct cf_mod *mod; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtcan), flags); if (!nlh) @@ -650,82 +664,83 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } + mod = cgw_job_cf_mod(gwj); if (gwj->flags & CGW_FLAGS_CAN_FD) { struct cgw_fdframe_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } else { struct cgw_frame_mod mb; - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; + if (mod->modtype.and) { + memcpy(&mb.cf, &mod->modframe.and, sizeof(mb.cf)); + mb.modtype = mod->modtype.and; if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; + if (mod->modtype.or) { + memcpy(&mb.cf, &mod->modframe.or, sizeof(mb.cf)); + mb.modtype = mod->modtype.or; if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; + if (mod->modtype.xor) { + memcpy(&mb.cf, &mod->modframe.xor, sizeof(mb.cf)); + mb.modtype = mod->modtype.xor; if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) goto cancel; } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; + if (mod->modtype.set) { + memcpy(&mb.cf, &mod->modframe.set, sizeof(mb.cf)); + mb.modtype = mod->modtype.set; if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) goto cancel; } } - if (gwj->mod.uid) { - if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0) + if (mod->uid) { + if (nla_put_u32(skb, CGW_MOD_UID, mod->uid) < 0) goto cancel; } - if (gwj->mod.csumfunc.crc8) { + if (mod->csumfunc.crc8) { if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN, - &gwj->mod.csum.crc8) < 0) + &mod->csum.crc8) < 0) goto cancel; } - if (gwj->mod.csumfunc.xor) { + if (mod->csumfunc.xor) { if (nla_put(skb, CGW_CS_XOR, CGW_CS_XOR_LEN, - &gwj->mod.csum.xor) < 0) + &mod->csum.xor) < 0) goto cancel; } @@ -1059,7 +1074,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct rtcanmsg *r; struct cgw_job *gwj; - struct cf_mod mod; + struct cf_mod *mod; struct can_can_gw ccgw; u8 limhops = 0; int err = 0; @@ -1078,37 +1093,48 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (r->gwtype != CGW_TYPE_CAN_CAN) return -EINVAL; - err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); + mod = kmalloc(sizeof(*mod), GFP_KERNEL); + if (!mod) + return -ENOMEM; + + err = cgw_parse_attr(nlh, mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops); if (err < 0) - return err; + goto out_free_cf; - if (mod.uid) { + if (mod->uid) { ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { - if (gwj->mod.uid != mod.uid) + struct cf_mod *old_cf; + + old_cf = cgw_job_cf_mod(gwj); + if (old_cf->uid != mod->uid) continue; /* interfaces & filters must be identical */ - if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) - return -EINVAL; + if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw))) { + err = -EINVAL; + goto out_free_cf; + } - /* update modifications with disabled softirq & quit */ - local_bh_disable(); - memcpy(&gwj->mod, &mod, sizeof(mod)); - local_bh_enable(); + rcu_assign_pointer(gwj->cf_mod, mod); + kfree_rcu_mightsleep(old_cf); return 0; } } /* ifindex == 0 is not allowed for job creation */ - if (!ccgw.src_idx || !ccgw.dst_idx) - return -ENODEV; + if (!ccgw.src_idx || !ccgw.dst_idx) { + err = -ENODEV; + goto out_free_cf; + } gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL); - if (!gwj) - return -ENOMEM; + if (!gwj) { + err = -ENOMEM; + goto out_free_cf; + } gwj->handled_frames = 0; gwj->dropped_frames = 0; @@ -1118,7 +1144,7 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, gwj->limit_hops = limhops; /* insert already parsed information */ - memcpy(&gwj->mod, &mod, sizeof(mod)); + RCU_INIT_POINTER(gwj->cf_mod, mod); memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw)); err = -ENODEV; @@ -1152,9 +1178,11 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, if (!err) hlist_add_head_rcu(&gwj->list, &net->can.cgw_list); out: - if (err) + if (err) { kmem_cache_free(cgw_cache, gwj); - +out_free_cf: + kfree(mod); + } return err; } @@ -1214,19 +1242,22 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { + struct cf_mod *cf_mod; + if (gwj->flags != r->flags) continue; if (gwj->limit_hops != limhops) continue; + cf_mod = cgw_job_cf_mod(gwj); /* we have a match when uid is enabled and identical */ - if (gwj->mod.uid || mod.uid) { - if (gwj->mod.uid != mod.uid) + if (cf_mod->uid || mod.uid) { + if (cf_mod->uid != mod.uid) continue; } else { /* no uid => check for identical modifications */ - if (memcmp(&gwj->mod, &mod, sizeof(mod))) + if (memcmp(cf_mod, &mod, sizeof(mod))) continue; } -- 2.50.1 From 023c1f2f0609218103cbcb48e0104b144d4a16dc Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Thu, 24 Apr 2025 18:01:42 +0530 Subject: [PATCH 03/16] wifi: cfg80211: fix out-of-bounds access during multi-link element defragmentation Currently during the multi-link element defragmentation process, the multi-link element length added to the total IEs length when calculating the length of remaining IEs after the multi-link element in cfg80211_defrag_mle(). This could lead to out-of-bounds access if the multi-link element or its corresponding fragment elements are the last elements in the IEs buffer. To address this issue, correctly calculate the remaining IEs length by deducting the multi-link element end offset from total IEs end offset. Cc: stable@vger.kernel.org Fixes: 2481b5da9c6b ("wifi: cfg80211: handle BSS data contained in ML probe responses") Signed-off-by: Veerendranath Jakkam Link: https://patch.msgid.link/20250424-fix_mle_defragmentation_oob_access-v1-1-84412a1743fa@quicinc.com Signed-off-by: Johannes Berg --- net/wireless/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9865f305275d..ddd3a97f6609 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -2681,7 +2681,7 @@ cfg80211_defrag_mle(const struct element *mle, const u8 *ie, size_t ielen, /* Required length for first defragmentation */ buf_len = mle->datalen - 1; for_each_element(elem, mle->data + mle->datalen, - ielen - sizeof(*mle) + mle->datalen) { + ie + ielen - mle->data - mle->datalen) { if (elem->id != WLAN_EID_FRAGMENT) break; -- 2.50.1 From e12a42f64fc3d74872b349eedd47f90c6676b78a Mon Sep 17 00:00:00 2001 From: Michael-CY Lee Date: Mon, 5 May 2025 16:19:46 +0800 Subject: [PATCH 04/16] wifi: mac80211: fix the type of status_code for negotiated TID to Link Mapping The status code should be type of __le16. Fixes: 83e897a961b8 ("wifi: ieee80211: add definitions for negotiated TID to Link map") Fixes: 8f500fbc6c65 ("wifi: mac80211: process and save negotiated TID to Link mapping request") Signed-off-by: Michael-CY Lee Link: https://patch.msgid.link/20250505081946.3927214-1-michael-cy.lee@mediatek.com Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 +- net/mac80211/mlme.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 508d466de1cc..457b4fba88bd 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1526,7 +1526,7 @@ struct ieee80211_mgmt { struct { u8 action_code; u8 dialog_token; - u8 status_code; + __le16 status_code; u8 variable[]; } __packed ttlm_res; struct { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5d1f2d6d09ad..35eaf0812c5b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -7675,6 +7675,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res); int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; + u16 status_code; skb = dev_alloc_skb(local->tx_headroom + hdr_len + ttlm_max_len); if (!skb) @@ -7697,19 +7698,18 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, WARN_ON(1); fallthrough; case NEG_TTLM_RES_REJECT: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; + status_code = WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING; break; case NEG_TTLM_RES_ACCEPT: - mgmt->u.action.u.ttlm_res.status_code = WLAN_STATUS_SUCCESS; + status_code = WLAN_STATUS_SUCCESS; break; case NEG_TTLM_RES_SUGGEST_PREFERRED: - mgmt->u.action.u.ttlm_res.status_code = - WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; + status_code = WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED; ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm); break; } + mgmt->u.action.u.ttlm_res.status_code = cpu_to_le16(status_code); ieee80211_tx_skb(sdata, skb); } @@ -7875,7 +7875,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, * This can be better implemented in the future, to handle request * rejections. */ - if (mgmt->u.action.u.ttlm_res.status_code != WLAN_STATUS_SUCCESS) + if (le16_to_cpu(mgmt->u.action.u.ttlm_res.status_code) != WLAN_STATUS_SUCCESS) __ieee80211_disconnect(sdata); } -- 2.50.1 From ebedf8b7f05b9c886d68d63025db8d1b12343157 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 May 2025 21:42:59 +0200 Subject: [PATCH 05/16] wifi: iwlwifi: add support for Killer on MTL For now, we need another entry for these devices, this will be changed completely for 6.16. Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219926 Link: https://patch.msgid.link/20250506214258.2efbdc9e9a82.I31915ec252bd1c74bd53b89a0e214e42a74b6f2e@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index debeea2b3ae5..00056e76ea3d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -588,6 +588,8 @@ VISIBLE_IF_IWLWIFI_KUNIT const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(0x7A70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x7AF0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), IWL_DEV_INFO(0x7AF0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), + IWL_DEV_INFO(0x7F70, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name), + IWL_DEV_INFO(0x7F70, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name), IWL_DEV_INFO(0x271C, 0x0214, iwl9260_2ac_cfg, iwl9260_1_name), IWL_DEV_INFO(0x7E40, 0x1691, iwl_cfg_ma, iwl_ax411_killer_1690s_name), -- 2.50.1 From 0093cb194a7511d1e68865fa35b763c72e44c2f0 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 5 May 2025 09:19:38 -0700 Subject: [PATCH 06/16] ice: use DSN instead of PCI BDF for ice_adapter index Use Device Serial Number instead of PCI bus/device/function for the index of struct ice_adapter. Functions on the same physical device should point to the very same ice_adapter instance, but with two PFs, when at least one of them is PCI-e passed-through to a VM, it is no longer the case - PFs will get seemingly random PCI BDF values, and thus indices, what finally leds to each of them being on their own instance of ice_adapter. That causes them to don't attempt any synchronization of the PTP HW clock usage, or any other future resources. DSN works nicely in place of the index, as it is "immutable" in terms of virtualization. Fixes: 0e2bddf9e5f9 ("ice: add ice_adapter for shared data across PFs on the same NIC") Suggested-by: Jacob Keller Suggested-by: Jakub Kicinski Suggested-by: Jiri Pirko Reviewed-by: Aleksandr Loktionov Signed-off-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Rinitha S (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Reviewed-by: Jiri Pirko Link: https://patch.msgid.link/20250505161939.2083581-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_adapter.c | 47 ++++++++------------ drivers/net/ethernet/intel/ice/ice_adapter.h | 6 ++- 2 files changed, 22 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 01a08cfd0090..66e070095d1b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: Copyright Red Hat -#include #include #include #include @@ -14,32 +13,16 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -/* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ -#define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) -#define INDEX_FIELD_DEV GENMASK(31, 16) -#define INDEX_FIELD_BUS GENMASK(12, 5) -#define INDEX_FIELD_SLOT GENMASK(4, 0) - -static unsigned long ice_adapter_index(const struct pci_dev *pdev) +static unsigned long ice_adapter_index(u64 dsn) { - unsigned int domain = pci_domain_nr(pdev->bus); - - WARN_ON(domain > FIELD_MAX(INDEX_FIELD_DOMAIN)); - - switch (pdev->device) { - case ICE_DEV_ID_E825C_BACKPLANE: - case ICE_DEV_ID_E825C_QSFP: - case ICE_DEV_ID_E825C_SFP: - case ICE_DEV_ID_E825C_SGMII: - return FIELD_PREP(INDEX_FIELD_DEV, pdev->device); - default: - return FIELD_PREP(INDEX_FIELD_DOMAIN, domain) | - FIELD_PREP(INDEX_FIELD_BUS, pdev->bus->number) | - FIELD_PREP(INDEX_FIELD_SLOT, PCI_SLOT(pdev->devfn)); - } +#if BITS_PER_LONG == 64 + return dsn; +#else + return (u32)dsn ^ (u32)(dsn >> 32); +#endif } -static struct ice_adapter *ice_adapter_new(void) +static struct ice_adapter *ice_adapter_new(u64 dsn) { struct ice_adapter *adapter; @@ -47,6 +30,7 @@ static struct ice_adapter *ice_adapter_new(void) if (!adapter) return NULL; + adapter->device_serial_number = dsn; spin_lock_init(&adapter->ptp_gltsyn_time_lock); refcount_set(&adapter->refcount, 1); @@ -77,23 +61,26 @@ static void ice_adapter_free(struct ice_adapter *adapter) * Return: Pointer to ice_adapter on success. * ERR_PTR() on error. -ENOMEM is the only possible error. */ -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; int err; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); + WARN_ON_ONCE(adapter->device_serial_number != dsn); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(); + adapter = ice_adapter_new(dsn); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -110,11 +97,13 @@ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) * * Context: Process, may sleep. */ -void ice_adapter_put(const struct pci_dev *pdev) +void ice_adapter_put(struct pci_dev *pdev) { - unsigned long index = ice_adapter_index(pdev); + u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; + unsigned long index; + index = ice_adapter_index(dsn); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index e233225848b3..ac15c0d2bc1a 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -32,6 +32,7 @@ struct ice_port_list { * @refcount: Reference count. struct ice_pf objects hold the references. * @ctrl_pf: Control PF of the adapter * @ports: Ports list + * @device_serial_number: DSN cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -40,9 +41,10 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; + u64 device_serial_number; }; -struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev); -void ice_adapter_put(const struct pci_dev *pdev); +struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); +void ice_adapter_put(struct pci_dev *pdev); #endif /* _ICE_ADAPTER_H */ -- 2.50.1 From 08e9f2d584c4732180edee4cb2dbfa7586d7d5a3 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Mon, 5 May 2025 22:47:13 +0300 Subject: [PATCH 07/16] net: Lock netdevices during dev_shutdown __qdisc_destroy() calls into various qdiscs .destroy() op, which in turn can call .ndo_setup_tc(), which requires the netdev instance lock. This commit extends the critical section in unregister_netdevice_many_notify() to cover dev_shutdown() (and dev_tcx_uninstall() as a side-effect) and acquires the netdev instance lock in __dev_change_net_namespace() for the other dev_shutdown() call. This should now guarantee that for all qdisc ops, the netdev instance lock is held during .ndo_setup_tc(). Fixes: a0527ee2df3f ("net: hold netdev instance lock during qdisc ndo_setup_tc") Signed-off-by: Cosmin Ratiu Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250505194713.1723399-1-cratiu@nvidia.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 1be7cb73a602..92e004c354ea 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11966,9 +11966,9 @@ void unregister_netdevice_many_notify(struct list_head *head, struct sk_buff *skb = NULL; /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); dev_tcx_uninstall(dev); - netdev_lock_ops(dev); dev_xdp_uninstall(dev); dev_memory_provider_uninstall(dev); netdev_unlock_ops(dev); @@ -12161,7 +12161,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, synchronize_net(); /* Shutdown queueing discipline. */ + netdev_lock_ops(dev); dev_shutdown(dev); + netdev_unlock_ops(dev); /* Notify protocols, that we are about to destroy * this device. They should clean all the things. -- 2.50.1 From 78cd408356fe3edbac66598772fd347bf3e32c1f Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 5 May 2025 18:19:19 -0700 Subject: [PATCH 08/16] net: add missing instance lock to dev_set_promiscuity Accidentally spotted while trying to understand what else needs to be renamed to netif_ prefix. Most of the calls to dev_set_promiscuity are adjacent to dev_set_allmulti or dev_disable_lro so it should be safe to add the lock. Note that new netif_set_promiscuity is currently unused, the locked paths call __dev_set_promiscuity directly. Fixes: ad7c7b2172c3 ("net: hold netdev instance lock during sysfs operations") Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20250506011919.2882313-1-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + net/core/dev.c | 14 +------------- net/core/dev_api.c | 23 +++++++++++++++++++++++ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d11d013cabe..7ea022750e4e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4972,6 +4972,7 @@ static inline void __dev_mc_unsync(struct net_device *dev, /* Functions used for secondary unicast and multicast support */ void dev_set_rx_mode(struct net_device *dev); +int netif_set_promiscuity(struct net_device *dev, int inc); int dev_set_promiscuity(struct net_device *dev, int inc); int netif_set_allmulti(struct net_device *dev, int inc, bool notify); int dev_set_allmulti(struct net_device *dev, int inc); diff --git a/net/core/dev.c b/net/core/dev.c index 92e004c354ea..11da1e272ec2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9193,18 +9193,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) return 0; } -/** - * dev_set_promiscuity - update promiscuity count on a device - * @dev: device - * @inc: modifier - * - * Add or remove promiscuity from a device. While the count in the device - * remains above zero the interface remains promiscuous. Once it hits zero - * the device reverts back to normal filtering operation. A negative inc - * value is used to drop promiscuity on the device. - * Return 0 if successful or a negative errno code on error. - */ -int dev_set_promiscuity(struct net_device *dev, int inc) +int netif_set_promiscuity(struct net_device *dev, int inc) { unsigned int old_flags = dev->flags; int err; @@ -9216,7 +9205,6 @@ int dev_set_promiscuity(struct net_device *dev, int inc) dev_set_rx_mode(dev); return err; } -EXPORT_SYMBOL(dev_set_promiscuity); int netif_set_allmulti(struct net_device *dev, int inc, bool notify) { diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 90898cd540ce..f9a160ab596f 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -267,6 +267,29 @@ void dev_disable_lro(struct net_device *dev) } EXPORT_SYMBOL(dev_disable_lro); +/** + * dev_set_promiscuity() - update promiscuity count on a device + * @dev: device + * @inc: modifier + * + * Add or remove promiscuity from a device. While the count in the device + * remains above zero the interface remains promiscuous. Once it hits zero + * the device reverts back to normal filtering operation. A negative inc + * value is used to drop promiscuity on the device. + * Return 0 if successful or a negative errno code on error. + */ +int dev_set_promiscuity(struct net_device *dev, int inc) +{ + int ret; + + netdev_lock_ops(dev); + ret = netif_set_promiscuity(dev, inc); + netdev_unlock_ops(dev); + + return ret; +} +EXPORT_SYMBOL(dev_set_promiscuity); + /** * dev_set_allmulti() - update allmulti count on a device * @dev: device -- 2.50.1 From e34090d7214e0516eb8722aee295cb2507317c07 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sat, 3 May 2025 01:01:18 +0300 Subject: [PATCH 09/16] ipvs: fix uninit-value for saddr in do_output_route4 syzbot reports for uninit-value for the saddr argument [1]. commit 4754957f04f5 ("ipvs: do not use random local source address for tunnels") already implies that the input value of saddr should be ignored but the code is still reading it which can prevent to connect the route. Fix it by changing the argument to ret_saddr. [1] BUG: KMSAN: uninit-value in do_output_route4+0x42c/0x4d0 net/netfilter/ipvs/ip_vs_xmit.c:147 do_output_route4+0x42c/0x4d0 net/netfilter/ipvs/ip_vs_xmit.c:147 __ip_vs_get_out_rt+0x403/0x21d0 net/netfilter/ipvs/ip_vs_xmit.c:330 ip_vs_tunnel_xmit+0x205/0x2380 net/netfilter/ipvs/ip_vs_xmit.c:1136 ip_vs_in_hook+0x1aa5/0x35b0 net/netfilter/ipvs/ip_vs_core.c:2063 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf7/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip_local_out+0x758/0x7e0 net/ipv4/ip_output.c:118 ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x6a/0x3c0 net/ipv4/ip_output.c:1501 udp_send_skb+0xfda/0x1b70 net/ipv4/udp.c:1195 udp_sendmsg+0x2fe3/0x33c0 net/ipv4/udp.c:1483 inet_sendmsg+0x1fc/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x267/0x380 net/socket.c:727 ____sys_sendmsg+0x91b/0xda0 net/socket.c:2566 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2620 __sys_sendmmsg+0x41d/0x880 net/socket.c:2702 __compat_sys_sendmmsg net/compat.c:360 [inline] __do_compat_sys_sendmmsg net/compat.c:367 [inline] __se_compat_sys_sendmmsg net/compat.c:364 [inline] __ia32_compat_sys_sendmmsg+0xc8/0x140 net/compat.c:364 ia32_sys_call+0x3ffa/0x41f0 arch/x86/include/generated/asm/syscalls_32.h:346 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x38/0x80 arch/x86/entry/syscall_32.c:331 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:369 entry_SYSENTER_compat_after_hwframe+0x84/0x8e Uninit was created at: slab_post_alloc_hook mm/slub.c:4167 [inline] slab_alloc_node mm/slub.c:4210 [inline] __kmalloc_cache_noprof+0x8fa/0xe00 mm/slub.c:4367 kmalloc_noprof include/linux/slab.h:905 [inline] ip_vs_dest_dst_alloc net/netfilter/ipvs/ip_vs_xmit.c:61 [inline] __ip_vs_get_out_rt+0x35d/0x21d0 net/netfilter/ipvs/ip_vs_xmit.c:323 ip_vs_tunnel_xmit+0x205/0x2380 net/netfilter/ipvs/ip_vs_xmit.c:1136 ip_vs_in_hook+0x1aa5/0x35b0 net/netfilter/ipvs/ip_vs_core.c:2063 nf_hook_entry_hookfn include/linux/netfilter.h:154 [inline] nf_hook_slow+0xf7/0x400 net/netfilter/core.c:626 nf_hook include/linux/netfilter.h:269 [inline] __ip_local_out+0x758/0x7e0 net/ipv4/ip_output.c:118 ip_local_out net/ipv4/ip_output.c:127 [inline] ip_send_skb+0x6a/0x3c0 net/ipv4/ip_output.c:1501 udp_send_skb+0xfda/0x1b70 net/ipv4/udp.c:1195 udp_sendmsg+0x2fe3/0x33c0 net/ipv4/udp.c:1483 inet_sendmsg+0x1fc/0x280 net/ipv4/af_inet.c:851 sock_sendmsg_nosec net/socket.c:712 [inline] __sock_sendmsg+0x267/0x380 net/socket.c:727 ____sys_sendmsg+0x91b/0xda0 net/socket.c:2566 ___sys_sendmsg+0x28d/0x3c0 net/socket.c:2620 __sys_sendmmsg+0x41d/0x880 net/socket.c:2702 __compat_sys_sendmmsg net/compat.c:360 [inline] __do_compat_sys_sendmmsg net/compat.c:367 [inline] __se_compat_sys_sendmmsg net/compat.c:364 [inline] __ia32_compat_sys_sendmmsg+0xc8/0x140 net/compat.c:364 ia32_sys_call+0x3ffa/0x41f0 arch/x86/include/generated/asm/syscalls_32.h:346 do_syscall_32_irqs_on arch/x86/entry/syscall_32.c:83 [inline] __do_fast_syscall_32+0xb0/0x110 arch/x86/entry/syscall_32.c:306 do_fast_syscall_32+0x38/0x80 arch/x86/entry/syscall_32.c:331 do_SYSENTER_32+0x1f/0x30 arch/x86/entry/syscall_32.c:369 entry_SYSENTER_compat_after_hwframe+0x84/0x8e CPU: 0 UID: 0 PID: 22408 Comm: syz.4.5165 Not tainted 6.15.0-rc3-syzkaller-00019-gbc3372351d0c #0 PREEMPT(undef) Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Reported-by: syzbot+04b9a82855c8aed20860@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/68138dfa.050a0220.14dd7d.0017.GAE@google.com/ Fixes: 4754957f04f5 ("ipvs: do not use random local source address for tunnels") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_xmit.c | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3313bceb6cc9..014f07740369 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -119,13 +119,12 @@ __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) return false; } -/* Get route to daddr, update *saddr, optionally bind route to saddr */ +/* Get route to daddr, optionally bind route to saddr */ static struct rtable *do_output_route4(struct net *net, __be32 daddr, - int rt_mode, __be32 *saddr) + int rt_mode, __be32 *ret_saddr) { struct flowi4 fl4; struct rtable *rt; - bool loop = false; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = daddr; @@ -135,23 +134,17 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, retry: rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { - /* Invalid saddr ? */ - if (PTR_ERR(rt) == -EINVAL && *saddr && - rt_mode & IP_VS_RT_MODE_CONNECT && !loop) { - *saddr = 0; - flowi4_update_output(&fl4, 0, daddr, 0); - goto retry; - } IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; - } else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { + } + if (rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) { ip_rt_put(rt); - *saddr = fl4.saddr; flowi4_update_output(&fl4, 0, daddr, fl4.saddr); - loop = true; + rt_mode = 0; goto retry; } - *saddr = fl4.saddr; + if (ret_saddr) + *ret_saddr = fl4.saddr; return rt; } @@ -344,19 +337,15 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, if (ret_saddr) *ret_saddr = dest_dst->dst_saddr.ip; } else { - __be32 saddr = htonl(INADDR_ANY); - noref = 0; /* For such unconfigured boxes avoid many route lookups * for performance reasons because we do not remember saddr */ rt_mode &= ~IP_VS_RT_MODE_CONNECT; - rt = do_output_route4(net, daddr, rt_mode, &saddr); + rt = do_output_route4(net, daddr, rt_mode, ret_saddr); if (!rt) goto err_unreach; - if (ret_saddr) - *ret_saddr = saddr; } local = (rt->rt_flags & RTCF_LOCAL) ? 1 : 0; -- 2.50.1 From 8478a729c0462273188263136880480729e9efca Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Wed, 7 May 2025 17:01:59 +0200 Subject: [PATCH 10/16] netfilter: ipset: fix region locking in hash types Region locking introduced in v5.6-rc4 contained three macros to handle the region locks: ahash_bucket_start(), ahash_bucket_end() which gave back the start and end hash bucket values belonging to a given region lock and ahash_region() which should give back the region lock belonging to a given hash bucket. The latter was incorrect which can lead to a race condition between the garbage collector and adding new elements when a hash type of set is defined with timeouts. Fixes: f66ee0410b1c ("netfilter: ipset: Fix "INFO: rcu detected stall in hash_xxx" reports") Reported-by: Kota Toda Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipset/ip_set_hash_gen.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index cf3ce72c3de6..5251524b96af 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -64,7 +64,7 @@ struct hbucket { #define ahash_sizeof_regions(htable_bits) \ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) #define ahash_region(n, htable_bits) \ - ((n) % ahash_numof_locks(htable_bits)) + ((n) / jhash_size(HTABLE_REGION_BITS)) #define ahash_bucket_start(h, htable_bits) \ ((htable_bits) < HTABLE_REGION_BITS ? 0 \ : (h) * jhash_size(HTABLE_REGION_BITS)) -- 2.50.1 From 6beb6835c1fbb3f676aebb51a5fee6b77fed9308 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Tue, 6 May 2025 16:28:54 +0200 Subject: [PATCH 11/16] openvswitch: Fix unsafe attribute parsing in output_userspace() This patch replaces the manual Netlink attribute iteration in output_userspace() with nla_for_each_nested(), which ensures that only well-formed attributes are processed. Fixes: ccb1352e76cf ("net: Add Open vSwitch kernel components.") Signed-off-by: Eelco Chaudron Acked-by: Ilya Maximets Acked-by: Aaron Conole Link: https://patch.msgid.link/0bd65949df61591d9171c0dc13e42cea8941da10.1746541734.git.echaudro@redhat.com Signed-off-by: Jakub Kicinski --- net/openvswitch/actions.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 61fea7baae5d..2f22ca59586f 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -975,8 +975,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.mru = OVS_CB(skb)->mru; - for (a = nla_data(attr), rem = nla_len(attr); rem > 0; - a = nla_next(a, &rem)) { + nla_for_each_nested(a, attr, rem) { switch (nla_type(a)) { case OVS_USERSPACE_ATTR_USERDATA: upcall.userdata = a; -- 2.50.1 From 4a7843cc8a41b9612becccc07715ed017770eb89 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Tue, 6 May 2025 18:56:47 +0200 Subject: [PATCH 12/16] net: airoha: Add missing field to ppe_mbox_data struct The official Airoha EN7581 firmware requires adding max_packet field in ppe_mbox_data struct while the unofficial one used to develop the Airoha EN7581 flowtable support does not require this field. This patch does not introduce any real backwards compatible issue since EN7581 fw is not publicly available in linux-firmware or other repositories (e.g. OpenWrt) yet and the official fw version will use this new layout. For this reason this change needs to be backported. Moreover, make explicit the padding added by the compiler introducing the rsv array in init_info struct. At the same time use u32 instead of int for init_info and set_info struct definitions in ppe_mbox_data struct. Fixes: 23290c7bc190d ("net: airoha: Introduce Airoha NPU support") Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20250506-airoha-en7581-fix-ppe_mbox_data-v5-1-29cabed6864d@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/airoha/airoha_npu.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index 7a5710f9ccf6..ead0625e781f 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -104,12 +104,14 @@ struct ppe_mbox_data { u8 xpon_hal_api; u8 wan_xsi; u8 ct_joyme4; - int ppe_type; - int wan_mode; - int wan_sel; + u8 max_packet; + u8 rsv[3]; + u32 ppe_type; + u32 wan_mode; + u32 wan_sel; } init_info; struct { - int func_id; + u32 func_id; u32 size; u32 data; } set_info; -- 2.50.1 From c4327229948879814229b46aa26a750718888503 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 5 May 2025 21:58:04 +0200 Subject: [PATCH 13/16] bpf: Scrub packet on bpf_redirect_peer When bpf_redirect_peer is used to redirect packets to a device in another network namespace, the skb isn't scrubbed. That can lead skb information from one namespace to be "misused" in another namespace. As one example, this is causing Cilium to drop traffic when using bpf_redirect_peer to redirect packets that just went through IPsec decryption to a container namespace. The following pwru trace shows (1) the packet path from the host's XFRM layer to the container's XFRM layer where it's dropped and (2) the number of active skb extensions at each function. NETNS MARK IFACE TUPLE FUNC 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 xfrm_rcv_cb .active_extensions = (__u8)2, 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 xfrm4_rcv_cb .active_extensions = (__u8)2, 4026533547 d00 eth0 10.244.3.124:35473->10.244.2.158:53 gro_cells_receive .active_extensions = (__u8)2, [...] 4026533547 0 eth0 10.244.3.124:35473->10.244.2.158:53 skb_do_redirect .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 ip_rcv .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 ip_rcv_core .active_extensions = (__u8)2, [...] 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 udp_queue_rcv_one_skb .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 __xfrm_policy_check .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 __xfrm_decode_session .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 security_xfrm_decode_session .active_extensions = (__u8)2, 4026534999 0 eth0 10.244.3.124:35473->10.244.2.158:53 kfree_skb_reason(SKB_DROP_REASON_XFRM_POLICY) .active_extensions = (__u8)2, In this case, there are no XFRM policies in the container's network namespace so the drop is unexpected. When we decrypt the IPsec packet, the XFRM state used for decryption is set in the skb extensions. This information is preserved across the netns switch. When we reach the XFRM policy check in the container's netns, __xfrm_policy_check drops the packet with LINUX_MIB_XFRMINNOPOLS because a (container-side) XFRM policy can't be found that matches the (host-side) XFRM state used for decryption. This patch fixes this by scrubbing the packet when using bpf_redirect_peer, as is done on typical netns switches via veth devices except skb->mark and skb->tstamp are not zeroed. Fixes: 9aa1206e8f482 ("bpf: Add redirect_peer helper") Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/1728ead5e0fe45e7a6542c36bd4e3ca07a73b7d6.1746460653.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index 79cab4d78dc3..577a4504e26f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2509,6 +2509,7 @@ int skb_do_redirect(struct sk_buff *skb) goto out_drop; skb->dev = dev; dev_sw_netstats_rx_add(dev, skb->len); + skb_scrub_packet(skb, false); return -EAGAIN; } return flags & BPF_F_NEIGH ? -- 2.50.1 From f5c79ffdc250bc8c90fd4fdf1e5d7ac4647912d5 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Mon, 5 May 2025 21:58:39 +0200 Subject: [PATCH 14/16] bpf: Clarify handling of mark and tstamp by redirect_peer When switching network namespaces with the bpf_redirect_peer helper, the skb->mark and skb->tstamp fields are not zeroed out like they can be on a typical netns switch. This patch clarifies that in the helper description. Signed-off-by: Paul Chaignon Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Link: https://patch.msgid.link/ccc86af26d43c5c0b776bcba2601b7479c0d46d0.1746460653.git.paul.chaignon@gmail.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/bpf.h | 3 +++ tools/include/uapi/linux/bpf.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 28705ae67784..fd404729b115 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 28705ae67784..fd404729b115 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The -- 2.50.1 From e5641daa0ea1932e2b0fa7f27843e712244f6538 Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:44 +0530 Subject: [PATCH 15/16] net: ti: icssg-prueth: Set XDP feature flags for ndev xdp_features demonstrates what all XDP capabilities are supported on a given network device. The driver needs to set these xdp_features flag to let the network stack know what XDP features a given driver is supporting. These flags need to be set for a given ndev irrespective of any XDP program being loaded or not. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-2-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index 443f90fa6557..ee35fecf61e7 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1109,11 +1109,6 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame static int emac_xdp_setup(struct prueth_emac *emac, struct netdev_bpf *bpf) { struct bpf_prog *prog = bpf->prog; - xdp_features_t val; - - val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | - NETDEV_XDP_ACT_NDO_XMIT; - xdp_set_features_flag(emac->ndev, val); if (!emac->xdpi.prog && !prog) return 0; @@ -1291,6 +1286,10 @@ static int prueth_netdev_init(struct prueth *prueth, ndev->hw_features = NETIF_F_SG; ndev->features = ndev->hw_features | NETIF_F_HW_VLAN_CTAG_FILTER; ndev->hw_features |= NETIF_PRUETH_HSR_OFFLOAD_FEATURES; + xdp_set_features_flag(ndev, + NETDEV_XDP_ACT_BASIC | + NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_NDO_XMIT); netif_napi_add(ndev, &emac->napi_rx, icssg_napi_rx_poll); hrtimer_setup(&emac->rx_hrtimer, &emac_rx_timer_callback, CLOCK_MONOTONIC, -- 2.50.1 From 8b3fae3e2376b70b7a76005263bc34d034b9c7bf Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Tue, 6 May 2025 16:35:45 +0530 Subject: [PATCH 16/16] net: ti: icssg-prueth: Fix kernel panic during concurrent Tx queue access Add __netif_tx_lock() to ensure that only one packet is being transmitted at a time to avoid race conditions in the netif_txq struct and prevent packet data corruption. Failing to do so causes kernel panic with the following error: [ 2184.746764] ------------[ cut here ]------------ [ 2184.751412] kernel BUG at lib/dynamic_queue_limits.c:99! [ 2184.756728] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP logs: https://gist.github.com/MeghanaMalladiTI/9c7aa5fc3b7fb03f87c74aad487956e9 The lock is acquired before calling emac_xmit_xdp_frame() and released after the call returns. This ensures that the TX queue is protected from concurrent access during the transmission of XDP frames. Fixes: 62aa3246f462 ("net: ti: icssg-prueth: Add XDP support") Signed-off-by: Meghana Malladi Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250506110546.4065715-3-m-malladi@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_common.c | 7 ++++++- drivers/net/ethernet/ti/icssg/icssg_prueth.c | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index b4be76e13a2f..38a80e004d1c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -650,6 +650,8 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, struct page *page, u32 *len) { struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct bpf_prog *xdp_prog; struct xdp_frame *xdpf; u32 pkt_len = *len; @@ -669,8 +671,11 @@ static u32 emac_run_xdp(struct prueth_emac *emac, struct xdp_buff *xdp, goto drop; } - q_idx = smp_processor_id() % emac->tx_ch_num; + q_idx = cpu % emac->tx_ch_num; + netif_txq = netdev_get_tx_queue(ndev, q_idx); + __netif_tx_lock(netif_txq, cpu); result = emac_xmit_xdp_frame(emac, xdpf, page, q_idx); + __netif_tx_unlock(netif_txq); if (result == ICSSG_XDP_CONSUMED) { ndev->stats.tx_dropped++; goto drop; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index ee35fecf61e7..86fc1278127c 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -1075,17 +1075,21 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame { struct prueth_emac *emac = netdev_priv(dev); struct net_device *ndev = emac->ndev; + struct netdev_queue *netif_txq; + int cpu = smp_processor_id(); struct xdp_frame *xdpf; unsigned int q_idx; int nxmit = 0; u32 err; int i; - q_idx = smp_processor_id() % emac->tx_ch_num; + q_idx = cpu % emac->tx_ch_num; + netif_txq = netdev_get_tx_queue(ndev, q_idx); if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; + __netif_tx_lock(netif_txq, cpu); for (i = 0; i < n; i++) { xdpf = frames[i]; err = emac_xmit_xdp_frame(emac, xdpf, NULL, q_idx); @@ -1095,6 +1099,7 @@ static int emac_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frame } nxmit++; } + __netif_tx_unlock(netif_txq); return nxmit; } -- 2.50.1