From 4cde0e4224ce70bb6e91930a8850b59194151838 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Mon, 12 May 2025 14:44:22 +0300 Subject: [PATCH 01/16] net: cpsw: isolate cpsw_ndo_ioctl() to just the old driver cpsw->slaves[slave_no].phy should be equal to netdev->phydev, because it is assigned from phy_attach_direct(). The latter is indirectly called from the two identically named cpsw_slave_open() functions, one in cpsw.c and another in cpsw_new.c. Thus, the driver should not need custom logic to find the PHY, the core can find it, and phy_do_ioctl_running() achieves exactly that. However, that is only the case for cpsw_new and for the cpsw driver in dual EMAC mode. This is explained in more detail in the previous commit. Thus, allow the simpler core logic to execute for cpsw_new, and move cpsw_ndo_ioctl() to cpsw.c. Signed-off-by: Vladimir Oltean Link: https://patch.msgid.link/20250512114422.4176010-2-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw.c | 21 +++++++++++++++++++++ drivers/net/ethernet/ti/cpsw_new.c | 2 +- drivers/net/ethernet/ti/cpsw_priv.c | 17 ----------------- drivers/net/ethernet/ti/cpsw_priv.h | 1 - 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b71352689768..54c24cd3d3be 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1156,6 +1156,27 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev) } #endif +/* We need a custom implementation of phy_do_ioctl_running() because in switch + * mode, dev->phydev may be different than the phy of the active_slave. We need + * to operate on the locally saved phy instead. + */ +static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) +{ + struct cpsw_priv *priv = netdev_priv(dev); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); + struct phy_device *phy; + + if (!netif_running(dev)) + return -EINVAL; + + phy = cpsw->slaves[slave_no].phy; + if (phy) + return phy_mii_ioctl(phy, req, cmd); + + return -EOPNOTSUPP; +} + static const struct net_device_ops cpsw_netdev_ops = { .ndo_open = cpsw_ndo_open, .ndo_stop = cpsw_ndo_stop, diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index f5b74d066f0e..8b9e2078c602 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -1132,7 +1132,7 @@ static const struct net_device_ops cpsw_netdev_ops = { .ndo_stop = cpsw_ndo_stop, .ndo_start_xmit = cpsw_ndo_start_xmit, .ndo_set_mac_address = cpsw_ndo_set_mac_address, - .ndo_eth_ioctl = cpsw_ndo_ioctl, + .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = cpsw_ndo_tx_timeout, .ndo_set_rx_mode = cpsw_ndo_set_rx_mode, diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 8e17da12d8f5..bc4fdf17a99e 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -710,23 +710,6 @@ int cpsw_hwtstamp_set(struct net_device *dev, } #endif /*CONFIG_TI_CPTS*/ -int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) -{ - struct cpsw_priv *priv = netdev_priv(dev); - struct cpsw_common *cpsw = priv->cpsw; - int slave_no = cpsw_slave_index(cpsw, priv); - struct phy_device *phy; - - if (!netif_running(dev)) - return -EINVAL; - - phy = cpsw->slaves[slave_no].phy; - if (phy) - return phy_mii_ioctl(phy, req, cmd); - - return -EOPNOTSUPP; -} - int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate) { struct cpsw_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index b23f98032669..91add8925e23 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -461,7 +461,6 @@ void soft_reset(const char *module, void __iomem *reg); void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv); void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue); int cpsw_need_resplit(struct cpsw_common *cpsw); -int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd); int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate); int cpsw_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type, void *type_data); -- 2.51.0 From 10465365f3b094ba9a9795f212d13dee594bcfe7 Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Mon, 12 May 2025 14:03:42 +0200 Subject: [PATCH 02/16] net: phy: marvell-88q2xxx: Enable temperature measurement in probe again Enabling of the temperature sensor was moved from mv88q2xxx_hwmon_probe to mv88q222x_config_init with the consequence that the sensor is only usable when the PHY is configured. Enable the sensor in mv88q2xxx_hwmon_probe as well to fix this. Signed-off-by: Dimitri Fedrau Link: https://patch.msgid.link/20250512-marvell-88q2xxx-hwmon-enable-at-probe-v4-1-9256a5c8f603@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/phy/marvell-88q2xxx.c | 103 +++++++++++++++++------------- 1 file changed, 57 insertions(+), 46 deletions(-) diff --git a/drivers/net/phy/marvell-88q2xxx.c b/drivers/net/phy/marvell-88q2xxx.c index 5c687164b8e0..f3d83b04c953 100644 --- a/drivers/net/phy/marvell-88q2xxx.c +++ b/drivers/net/phy/marvell-88q2xxx.c @@ -119,7 +119,6 @@ #define MV88Q2XXX_LED_INDEX_GPIO 1 struct mv88q2xxx_priv { - bool enable_temp; bool enable_led0; }; @@ -482,49 +481,6 @@ static int mv88q2xxx_config_aneg(struct phy_device *phydev) return phydev->drv->soft_reset(phydev); } -static int mv88q2xxx_config_init(struct phy_device *phydev) -{ - struct mv88q2xxx_priv *priv = phydev->priv; - int ret; - - /* The 88Q2XXX PHYs do have the extended ability register available, but - * register MDIO_PMA_EXTABLE where they should signalize it does not - * work according to specification. Therefore, we force it here. - */ - phydev->pma_extable = MDIO_PMA_EXTABLE_BT1; - - /* Configure interrupt with default settings, output is driven low for - * active interrupt and high for inactive. - */ - if (phy_interrupt_is_valid(phydev)) { - ret = phy_set_bits_mmd(phydev, MDIO_MMD_PCS, - MDIO_MMD_PCS_MV_GPIO_INT_CTRL, - MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS); - if (ret < 0) - return ret; - } - - /* Enable LED function and disable TX disable feature on LED/TX_ENABLE */ - if (priv->enable_led0) { - ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PCS, - MDIO_MMD_PCS_MV_RESET_CTRL, - MDIO_MMD_PCS_MV_RESET_CTRL_TX_DISABLE); - if (ret < 0) - return ret; - } - - /* Enable temperature sense */ - if (priv->enable_temp) { - ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, - MDIO_MMD_PCS_MV_TEMP_SENSOR2, - MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0); - if (ret < 0) - return ret; - } - - return 0; -} - static int mv88q2xxx_get_sqi(struct phy_device *phydev) { int ret; @@ -667,6 +623,12 @@ static int mv88q2xxx_resume(struct phy_device *phydev) } #if IS_ENABLED(CONFIG_HWMON) +static int mv88q2xxx_enable_temp_sense(struct phy_device *phydev) +{ + return phy_modify_mmd(phydev, MDIO_MMD_PCS, MDIO_MMD_PCS_MV_TEMP_SENSOR2, + MDIO_MMD_PCS_MV_TEMP_SENSOR2_DIS_MASK, 0); +} + static const struct hwmon_channel_info * const mv88q2xxx_hwmon_info[] = { HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_ALARM), NULL @@ -762,11 +724,13 @@ static const struct hwmon_chip_info mv88q2xxx_hwmon_chip_info = { static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) { - struct mv88q2xxx_priv *priv = phydev->priv; struct device *dev = &phydev->mdio.dev; struct device *hwmon; + int ret; - priv->enable_temp = true; + ret = mv88q2xxx_enable_temp_sense(phydev); + if (ret < 0) + return ret; hwmon = devm_hwmon_device_register_with_info(dev, NULL, phydev, &mv88q2xxx_hwmon_chip_info, @@ -776,6 +740,11 @@ static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) } #else +static int mv88q2xxx_enable_temp_sense(struct phy_device *phydev) +{ + return 0; +} + static int mv88q2xxx_hwmon_probe(struct phy_device *phydev) { return 0; @@ -843,6 +812,48 @@ static int mv88q2xxx_probe(struct phy_device *phydev) return mv88q2xxx_hwmon_probe(phydev); } +static int mv88q2xxx_config_init(struct phy_device *phydev) +{ + struct mv88q2xxx_priv *priv = phydev->priv; + int ret; + + /* The 88Q2XXX PHYs do have the extended ability register available, but + * register MDIO_PMA_EXTABLE where they should signalize it does not + * work according to specification. Therefore, we force it here. + */ + phydev->pma_extable = MDIO_PMA_EXTABLE_BT1; + + /* Configure interrupt with default settings, output is driven low for + * active interrupt and high for inactive. + */ + if (phy_interrupt_is_valid(phydev)) { + ret = phy_set_bits_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_GPIO_INT_CTRL, + MDIO_MMD_PCS_MV_GPIO_INT_CTRL_TRI_DIS); + if (ret < 0) + return ret; + } + + /* Enable LED function and disable TX disable feature on LED/TX_ENABLE */ + if (priv->enable_led0) { + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_PCS, + MDIO_MMD_PCS_MV_RESET_CTRL, + MDIO_MMD_PCS_MV_RESET_CTRL_TX_DISABLE); + if (ret < 0) + return ret; + } + + /* Enable temperature sense again. There might have been a hard reset + * of the PHY and in this case the register content is restored to + * defaults and we need to enable it again. + */ + ret = mv88q2xxx_enable_temp_sense(phydev); + if (ret < 0) + return ret; + + return 0; +} + static int mv88q2110_config_init(struct phy_device *phydev) { int ret; -- 2.51.0 From e505e140738005715b1b20eb848711b21a1b3c76 Mon Sep 17 00:00:00 2001 From: Lee Trager Date: Mon, 12 May 2025 11:53:57 -0700 Subject: [PATCH 03/16] pldmfw: Don't require send_package_data or send_component_table to be defined Not all drivers require send_package_data or send_component_table when updating firmware. Instead of forcing drivers to implement a stub allow these functions to go undefined. Signed-off-by: Lee Trager Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Jacob Keller Link: https://patch.msgid.link/20250512190109.2475614-2-lee@trager.us Signed-off-by: Paolo Abeni --- lib/pldmfw/pldmfw.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/pldmfw/pldmfw.c b/lib/pldmfw/pldmfw.c index 6264e2013f25..b45ceb725780 100644 --- a/lib/pldmfw/pldmfw.c +++ b/lib/pldmfw/pldmfw.c @@ -728,6 +728,9 @@ pldm_send_package_data(struct pldmfw_priv *data) struct pldmfw_record *record = data->matching_record; const struct pldmfw_ops *ops = data->context->ops; + if (!ops->send_package_data) + return 0; + return ops->send_package_data(data->context, record->package_data, record->package_data_len); } @@ -755,6 +758,9 @@ pldm_send_component_tables(struct pldmfw_priv *data) if (!test_bit(index, bitmap)) continue; + if (!data->context->ops->send_component_table) + continue; + /* determine whether this is the start, middle, end, or both * the start and end of the component tables */ -- 2.51.0 From bb7e124e30fd44b29f60086c77eb6242e3b0158f Mon Sep 17 00:00:00 2001 From: Lee Trager Date: Mon, 12 May 2025 11:53:58 -0700 Subject: [PATCH 04/16] eth: fbnic: Accept minimum anti-rollback version from firmware fbnic supports applying firmware which may not be rolled back. This is implemented in firmware however it is useful for the driver to know the minimum supported firmware version. This will enable the driver validate new firmware before it is sent to the NIC. If it is too old the driver can provide a clear message that the version is too old. Signed-off-by: Lee Trager Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250512190109.2475614-3-lee@trager.us Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 4 ++++ drivers/net/ethernet/meta/fbnic/fbnic_fw.h | 2 ++ 2 files changed, 6 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 3d9636a6c968..e4f72fb730a6 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -464,6 +464,7 @@ static const struct fbnic_tlv_index fbnic_fw_cap_resp_index[] = { FBNIC_TLV_ATTR_U32(FBNIC_FW_CAP_RESP_UEFI_VERSION), FBNIC_TLV_ATTR_STRING(FBNIC_FW_CAP_RESP_UEFI_COMMIT_STR, FBNIC_FW_CAP_RESP_COMMIT_MAX_SIZE), + FBNIC_TLV_ATTR_U32(FBNIC_FW_CAP_RESP_ANTI_ROLLBACK_VERSION), FBNIC_TLV_ATTR_LAST }; @@ -586,6 +587,9 @@ static int fbnic_fw_parse_cap_resp(void *opaque, struct fbnic_tlv_msg **results) if (results[FBNIC_FW_CAP_RESP_BMC_ALL_MULTI] || !bmc_present) fbd->fw_cap.all_multi = all_multi; + fbd->fw_cap.anti_rollback_version = + fta_get_uint(results, FBNIC_FW_CAP_RESP_ANTI_ROLLBACK_VERSION); + return 0; } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index a3618e7826c2..692dfd8746e7 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -42,6 +42,7 @@ struct fbnic_fw_cap { u8 all_multi : 1; u8 link_speed; u8 link_fec; + u32 anti_rollback_version; }; struct fbnic_fw_completion { @@ -122,6 +123,7 @@ enum { FBNIC_FW_CAP_RESP_STORED_CMRT_COMMIT_STR = 0x10, FBNIC_FW_CAP_RESP_UEFI_VERSION = 0x11, FBNIC_FW_CAP_RESP_UEFI_COMMIT_STR = 0x12, + FBNIC_FW_CAP_RESP_ANTI_ROLLBACK_VERSION = 0x15, FBNIC_FW_CAP_RESP_MSG_MAX }; -- 2.51.0 From cc083264ad756c2ff34a97da6fce94a9568dffa1 Mon Sep 17 00:00:00 2001 From: Lee Trager Date: Mon, 12 May 2025 11:53:59 -0700 Subject: [PATCH 05/16] eth: fbnic: Add support for multiple concurrent completion messages Extend fbnic mailbox to support multiple concurrent completion messages at once. This enables fbnic to support running multiple operations at once which depend on a response from firmware via the mailbox. Signed-off-by: Lee Trager Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Jacob Keller Link: https://patch.msgid.link/20250512190109.2475614-4-lee@trager.us Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic.h | 3 +- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 99 +++++++++++++++++---- drivers/net/ethernet/meta/fbnic/fbnic_fw.h | 5 +- drivers/net/ethernet/meta/fbnic/fbnic_mac.c | 2 +- 4 files changed, 87 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index ad01ed05d78b..65815d4f379e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -19,6 +19,7 @@ struct fbnic_napi_vector; #define FBNIC_MAX_NAPI_VECTORS 128u +#define FBNIC_MBX_CMPL_SLOTS 4 struct fbnic_dev { struct device *dev; @@ -42,7 +43,7 @@ struct fbnic_dev { struct fbnic_fw_mbx mbx[FBNIC_IPC_MBX_INDICES]; struct fbnic_fw_cap fw_cap; - struct fbnic_fw_completion *cmpl_data; + struct fbnic_fw_completion *cmpl_data[FBNIC_MBX_CMPL_SLOTS]; /* Lock protecting Tx Mailbox queue to prevent possible races */ spinlock_t fw_tx_lock; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index e4f72fb730a6..6fcba4e8c21e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -237,6 +237,44 @@ static int fbnic_mbx_map_tlv_msg(struct fbnic_dev *fbd, return err; } +static int fbnic_mbx_set_cmpl_slot(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data) +{ + struct fbnic_fw_mbx *tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; + int free = -EXFULL; + int i; + + if (!tx_mbx->ready) + return -ENODEV; + + for (i = 0; i < FBNIC_MBX_CMPL_SLOTS; i++) { + if (!fbd->cmpl_data[i]) + free = i; + else if (fbd->cmpl_data[i]->msg_type == cmpl_data->msg_type) + return -EEXIST; + } + + if (free == -EXFULL) + return -EXFULL; + + fbd->cmpl_data[free] = cmpl_data; + + return 0; +} + +static void fbnic_mbx_clear_cmpl_slot(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data) +{ + int i; + + for (i = 0; i < FBNIC_MBX_CMPL_SLOTS; i++) { + if (fbd->cmpl_data[i] == cmpl_data) { + fbd->cmpl_data[i] = NULL; + break; + } + } +} + static void fbnic_mbx_process_tx_msgs(struct fbnic_dev *fbd) { struct fbnic_fw_mbx *tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; @@ -258,6 +296,19 @@ static void fbnic_mbx_process_tx_msgs(struct fbnic_dev *fbd) tx_mbx->head = head; } +int fbnic_mbx_set_cmpl(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data) +{ + unsigned long flags; + int err; + + spin_lock_irqsave(&fbd->fw_tx_lock, flags); + err = fbnic_mbx_set_cmpl_slot(fbd, cmpl_data); + spin_unlock_irqrestore(&fbd->fw_tx_lock, flags); + + return err; +} + static int fbnic_mbx_map_req_w_cmpl(struct fbnic_dev *fbd, struct fbnic_tlv_msg *msg, struct fbnic_fw_completion *cmpl_data) @@ -266,23 +317,20 @@ static int fbnic_mbx_map_req_w_cmpl(struct fbnic_dev *fbd, int err; spin_lock_irqsave(&fbd->fw_tx_lock, flags); - - /* If we are already waiting on a completion then abort */ - if (cmpl_data && fbd->cmpl_data) { - err = -EBUSY; - goto unlock_mbx; + if (cmpl_data) { + err = fbnic_mbx_set_cmpl_slot(fbd, cmpl_data); + if (err) + goto unlock_mbx; } - /* Record completion location and submit request */ - if (cmpl_data) - fbd->cmpl_data = cmpl_data; - err = fbnic_mbx_map_msg(fbd, FBNIC_IPC_MBX_TX_IDX, msg, le16_to_cpu(msg->hdr.len) * sizeof(u32), 1); - /* If msg failed then clear completion data for next caller */ + /* If we successfully reserved a completion and msg failed + * then clear completion data for next caller + */ if (err && cmpl_data) - fbd->cmpl_data = NULL; + fbnic_mbx_clear_cmpl_slot(fbd, cmpl_data); unlock_mbx: spin_unlock_irqrestore(&fbd->fw_tx_lock, flags); @@ -304,12 +352,18 @@ fbnic_fw_get_cmpl_by_type(struct fbnic_dev *fbd, u32 msg_type) { struct fbnic_fw_completion *cmpl_data = NULL; unsigned long flags; + int i; spin_lock_irqsave(&fbd->fw_tx_lock, flags); - if (fbd->cmpl_data && fbd->cmpl_data->msg_type == msg_type) { - cmpl_data = fbd->cmpl_data; - kref_get(&fbd->cmpl_data->ref_count); + for (i = 0; i < FBNIC_MBX_CMPL_SLOTS; i++) { + if (fbd->cmpl_data[i] && + fbd->cmpl_data[i]->msg_type == msg_type) { + cmpl_data = fbd->cmpl_data[i]; + kref_get(&cmpl_data->ref_count); + break; + } } + spin_unlock_irqrestore(&fbd->fw_tx_lock, flags); return cmpl_data; @@ -925,10 +979,16 @@ static void __fbnic_fw_evict_cmpl(struct fbnic_fw_completion *cmpl_data) static void fbnic_mbx_evict_all_cmpl(struct fbnic_dev *fbd) { - if (fbd->cmpl_data) { - __fbnic_fw_evict_cmpl(fbd->cmpl_data); - fbd->cmpl_data = NULL; + int i; + + for (i = 0; i < FBNIC_MBX_CMPL_SLOTS; i++) { + struct fbnic_fw_completion *cmpl_data = fbd->cmpl_data[i]; + + if (cmpl_data) + __fbnic_fw_evict_cmpl(cmpl_data); } + + memset(fbd->cmpl_data, 0, sizeof(fbd->cmpl_data)); } void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) @@ -989,12 +1049,13 @@ void fbnic_fw_init_cmpl(struct fbnic_fw_completion *fw_cmpl, kref_init(&fw_cmpl->ref_count); } -void fbnic_fw_clear_compl(struct fbnic_dev *fbd) +void fbnic_fw_clear_cmpl(struct fbnic_dev *fbd, + struct fbnic_fw_completion *fw_cmpl) { unsigned long flags; spin_lock_irqsave(&fbd->fw_tx_lock, flags); - fbd->cmpl_data = NULL; + fbnic_mbx_clear_cmpl_slot(fbd, fw_cmpl); spin_unlock_irqrestore(&fbd->fw_tx_lock, flags); } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index 692dfd8746e7..39dec0792090 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -60,6 +60,8 @@ struct fbnic_fw_completion { void fbnic_mbx_init(struct fbnic_dev *fbd); void fbnic_mbx_clean(struct fbnic_dev *fbd); +int fbnic_mbx_set_cmpl(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data); void fbnic_mbx_poll(struct fbnic_dev *fbd); int fbnic_mbx_poll_tx_ready(struct fbnic_dev *fbd); void fbnic_mbx_flush_tx(struct fbnic_dev *fbd); @@ -70,7 +72,8 @@ int fbnic_fw_xmit_tsene_read_msg(struct fbnic_dev *fbd, struct fbnic_fw_completion *cmpl_data); void fbnic_fw_init_cmpl(struct fbnic_fw_completion *cmpl_data, u32 msg_type); -void fbnic_fw_clear_compl(struct fbnic_dev *fbd); +void fbnic_fw_clear_cmpl(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data); void fbnic_fw_put_cmpl(struct fbnic_fw_completion *cmpl_data); #define fbnic_mk_full_fw_ver_str(_rev_id, _delim, _commit, _str, _str_sz) \ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index dde4a37116e2..4ba6f8d10775 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -744,7 +744,7 @@ static int fbnic_mac_get_sensor_asic(struct fbnic_dev *fbd, int id, *val = *sensor; exit_cleanup: - fbnic_fw_clear_compl(fbd); + fbnic_fw_clear_cmpl(fbd, fw_cmpl); exit_free: fbnic_fw_put_cmpl(fw_cmpl); -- 2.51.0 From 2a4ada8a99e6ba3dd0399c36b572579b4526bac0 Mon Sep 17 00:00:00 2001 From: Lee Trager Date: Mon, 12 May 2025 11:54:00 -0700 Subject: [PATCH 06/16] eth: fbnic: Add mailbox support for PLDM updates Add three new mailbox messages to support PLDM upgrades: * FW_START_UPGRADE - Enables driver to request starting a firmware upgrade by specifying the component to be upgraded and its size. * WRITE_CHUNK - Allows firmware to request driver to send a chunk of data at the specified offset. * FINISH_UPGRADE - Allows firmware to cancel the upgrade process and return an error. Signed-off-by: Lee Trager Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250512190109.2475614-5-lee@trager.us Signed-off-by: Paolo Abeni --- drivers/net/ethernet/meta/fbnic/fbnic_fw.c | 191 +++++++++++++++++++++ drivers/net/ethernet/meta/fbnic/fbnic_fw.h | 37 ++++ 2 files changed, 228 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 6fcba4e8c21e..6a803a59dc25 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -766,6 +766,188 @@ void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd) dev_warn(fbd->dev, "Failed to send heartbeat message\n"); } +int fbnic_fw_xmit_fw_start_upgrade(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + unsigned int id, unsigned int len) +{ + struct fbnic_tlv_msg *msg; + int err; + + if (!fbnic_fw_present(fbd)) + return -ENODEV; + + if (!len) + return -EINVAL; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_FW_START_UPGRADE_REQ); + if (!msg) + return -ENOMEM; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_START_UPGRADE_SECTION, id); + if (err) + goto free_message; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_START_UPGRADE_IMAGE_LENGTH, + len); + if (err) + goto free_message; + + err = fbnic_mbx_map_req_w_cmpl(fbd, msg, cmpl_data); + if (err) + goto free_message; + + return 0; + +free_message: + free_page((unsigned long)msg); + return err; +} + +static const struct fbnic_tlv_index fbnic_fw_start_upgrade_resp_index[] = { + FBNIC_TLV_ATTR_S32(FBNIC_FW_START_UPGRADE_ERROR), + FBNIC_TLV_ATTR_LAST +}; + +static int fbnic_fw_parse_fw_start_upgrade_resp(void *opaque, + struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + struct fbnic_dev *fbd = opaque; + u32 msg_type; + s32 err; + + /* Verify we have a completion pointer */ + msg_type = FBNIC_TLV_MSG_ID_FW_START_UPGRADE_REQ; + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, msg_type); + if (!cmpl_data) + return -ENOSPC; + + /* Check for errors */ + err = fta_get_sint(results, FBNIC_FW_START_UPGRADE_ERROR); + + cmpl_data->result = err; + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return 0; +} + +int fbnic_fw_xmit_fw_write_chunk(struct fbnic_dev *fbd, + const u8 *data, u32 offset, u16 length, + int cancel_error) +{ + struct fbnic_tlv_msg *msg; + int err; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_RESP); + if (!msg) + return -ENOMEM; + + /* Report error to FW to cancel upgrade */ + if (cancel_error) { + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_WRITE_CHUNK_ERROR, + cancel_error); + if (err) + goto free_message; + } + + if (data) { + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_WRITE_CHUNK_OFFSET, + offset); + if (err) + goto free_message; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_WRITE_CHUNK_LENGTH, + length); + if (err) + goto free_message; + + err = fbnic_tlv_attr_put_value(msg, FBNIC_FW_WRITE_CHUNK_DATA, + data + offset, length); + if (err) + goto free_message; + } + + err = fbnic_mbx_map_tlv_msg(fbd, msg); + if (err) + goto free_message; + + return 0; + +free_message: + free_page((unsigned long)msg); + return err; +} + +static const struct fbnic_tlv_index fbnic_fw_write_chunk_req_index[] = { + FBNIC_TLV_ATTR_U32(FBNIC_FW_WRITE_CHUNK_OFFSET), + FBNIC_TLV_ATTR_U32(FBNIC_FW_WRITE_CHUNK_LENGTH), + FBNIC_TLV_ATTR_LAST +}; + +static int fbnic_fw_parse_fw_write_chunk_req(void *opaque, + struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + struct fbnic_dev *fbd = opaque; + u32 msg_type; + u32 offset; + u32 length; + + /* Verify we have a completion pointer */ + msg_type = FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_REQ; + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, msg_type); + if (!cmpl_data) + return -ENOSPC; + + /* Pull length/offset pair and mark it as complete */ + offset = fta_get_uint(results, FBNIC_FW_WRITE_CHUNK_OFFSET); + length = fta_get_uint(results, FBNIC_FW_WRITE_CHUNK_LENGTH); + cmpl_data->u.fw_update.offset = offset; + cmpl_data->u.fw_update.length = length; + + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return 0; +} + +static const struct fbnic_tlv_index fbnic_fw_finish_upgrade_req_index[] = { + FBNIC_TLV_ATTR_S32(FBNIC_FW_FINISH_UPGRADE_ERROR), + FBNIC_TLV_ATTR_LAST +}; + +static int fbnic_fw_parse_fw_finish_upgrade_req(void *opaque, + struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + struct fbnic_dev *fbd = opaque; + u32 msg_type; + s32 err; + + /* Verify we have a completion pointer */ + msg_type = FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_REQ; + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, msg_type); + if (!cmpl_data) + return -ENOSPC; + + /* Check for errors */ + err = fta_get_sint(results, FBNIC_FW_FINISH_UPGRADE_ERROR); + + /* Close out update by incrementing offset by length which should + * match the total size of the component. Set length to 0 since no + * new chunks will be requested. + */ + cmpl_data->u.fw_update.offset += cmpl_data->u.fw_update.length; + cmpl_data->u.fw_update.length = 0; + + cmpl_data->result = err; + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return 0; +} + /** * fbnic_fw_xmit_tsene_read_msg - Create and transmit a sensor read request * @fbd: FBNIC device structure @@ -850,6 +1032,15 @@ static const struct fbnic_tlv_parser fbnic_fw_tlv_parser[] = { fbnic_fw_parse_ownership_resp), FBNIC_TLV_PARSER(HEARTBEAT_RESP, fbnic_heartbeat_resp_index, fbnic_fw_parse_heartbeat_resp), + FBNIC_TLV_PARSER(FW_START_UPGRADE_RESP, + fbnic_fw_start_upgrade_resp_index, + fbnic_fw_parse_fw_start_upgrade_resp), + FBNIC_TLV_PARSER(FW_WRITE_CHUNK_REQ, + fbnic_fw_write_chunk_req_index, + fbnic_fw_parse_fw_write_chunk_req), + FBNIC_TLV_PARSER(FW_FINISH_UPGRADE_REQ, + fbnic_fw_finish_upgrade_req_index, + fbnic_fw_parse_fw_finish_upgrade_req), FBNIC_TLV_PARSER(TSENE_READ_RESP, fbnic_tsene_read_resp_index, fbnic_fw_parse_tsene_read_resp), diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index 39dec0792090..0ab6ae3859e4 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -51,6 +51,10 @@ struct fbnic_fw_completion { struct kref ref_count; int result; union { + struct { + u32 offset; + u32 length; + } fw_update; struct { s32 millivolts; s32 millidegrees; @@ -68,6 +72,12 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd); int fbnic_fw_xmit_ownership_msg(struct fbnic_dev *fbd, bool take_ownership); int fbnic_fw_init_heartbeat(struct fbnic_dev *fbd, bool poll); void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd); +int fbnic_fw_xmit_fw_start_upgrade(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + unsigned int id, unsigned int len); +int fbnic_fw_xmit_fw_write_chunk(struct fbnic_dev *fbd, + const u8 *data, u32 offset, u16 length, + int cancel_error); int fbnic_fw_xmit_tsene_read_msg(struct fbnic_dev *fbd, struct fbnic_fw_completion *cmpl_data); void fbnic_fw_init_cmpl(struct fbnic_fw_completion *cmpl_data, @@ -99,6 +109,12 @@ enum { FBNIC_TLV_MSG_ID_OWNERSHIP_RESP = 0x13, FBNIC_TLV_MSG_ID_HEARTBEAT_REQ = 0x14, FBNIC_TLV_MSG_ID_HEARTBEAT_RESP = 0x15, + FBNIC_TLV_MSG_ID_FW_START_UPGRADE_REQ = 0x22, + FBNIC_TLV_MSG_ID_FW_START_UPGRADE_RESP = 0x23, + FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_REQ = 0x24, + FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_RESP = 0x25, + FBNIC_TLV_MSG_ID_FW_FINISH_UPGRADE_REQ = 0x28, + FBNIC_TLV_MSG_ID_FW_FINISH_UPGRADE_RESP = 0x29, FBNIC_TLV_MSG_ID_TSENE_READ_REQ = 0x3C, FBNIC_TLV_MSG_ID_TSENE_READ_RESP = 0x3D, }; @@ -154,4 +170,25 @@ enum { FBNIC_FW_OWNERSHIP_FLAG = 0x0, FBNIC_FW_OWNERSHIP_MSG_MAX }; + +enum { + FBNIC_FW_START_UPGRADE_ERROR = 0x0, + FBNIC_FW_START_UPGRADE_SECTION = 0x1, + FBNIC_FW_START_UPGRADE_IMAGE_LENGTH = 0x2, + FBNIC_FW_START_UPGRADE_MSG_MAX +}; + +enum { + FBNIC_FW_WRITE_CHUNK_OFFSET = 0x0, + FBNIC_FW_WRITE_CHUNK_LENGTH = 0x1, + FBNIC_FW_WRITE_CHUNK_DATA = 0x2, + FBNIC_FW_WRITE_CHUNK_ERROR = 0x3, + FBNIC_FW_WRITE_CHUNK_MSG_MAX +}; + +enum { + FBNIC_FW_FINISH_UPGRADE_ERROR = 0x0, + FBNIC_FW_FINISH_UPGRADE_MSG_MAX +}; + #endif /* _FBNIC_FW_H_ */ -- 2.51.0 From 82534f446daa0d2a995c9d64697add7265dff625 Mon Sep 17 00:00:00 2001 From: Lee Trager Date: Mon, 12 May 2025 11:54:01 -0700 Subject: [PATCH 07/16] eth: fbnic: Add devlink dev flash support Add support to update the CMRT and control firmware as well as the UEFI driver on fbnic using devlink dev flash. Make sure the shutdown / quiescence paths like suspend take the devlink lock to prevent them from interrupting the FW flashing process. Signed-off-by: Lee Trager Signed-off-by: Jakub Kicinski Reviewed-by: Jacob Keller Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250512190109.2475614-6-lee@trager.us Signed-off-by: Paolo Abeni --- .../device_drivers/ethernet/meta/fbnic.rst | 11 + drivers/net/ethernet/meta/Kconfig | 1 + .../net/ethernet/meta/fbnic/fbnic_devlink.c | 260 +++++++++++++++++- drivers/net/ethernet/meta/fbnic/fbnic_fw.h | 9 + drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 9 + 5 files changed, 289 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst index 3483e498c08e..f8592dec8851 100644 --- a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst +++ b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst @@ -28,6 +28,17 @@ devlink dev info provides version information for all three components. In addition to the version the hg commit hash of the build is included as a separate entry. +Upgrading Firmware +------------------ + +fbnic supports updating firmware using signed PLDM images with devlink dev +flash. PLDM images are written into the flash. Flashing does not interrupt +the operation of the device. + +On host boot the latest UEFI driver is always used, no explicit activation +is required. Firmware activation is required to run new control firmware. cmrt +firmware can only be activated by power cycling the NIC. + Statistics ---------- diff --git a/drivers/net/ethernet/meta/Kconfig b/drivers/net/ethernet/meta/Kconfig index 831921b9d4d5..3ba527514f1e 100644 --- a/drivers/net/ethernet/meta/Kconfig +++ b/drivers/net/ethernet/meta/Kconfig @@ -27,6 +27,7 @@ config FBNIC select NET_DEVLINK select PAGE_POOL select PHYLINK + select PLDMFW help This driver supports Meta Platforms Host Network Interface. diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c index 0072d612215e..71d9461a0d1b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c @@ -3,10 +3,12 @@ #include #include +#include #include #include #include "fbnic.h" +#include "fbnic_tlv.h" #define FBNIC_SN_STR_LEN 24 @@ -109,8 +111,264 @@ static int fbnic_devlink_info_get(struct devlink *devlink, return 0; } +static bool +fbnic_pldm_match_record(struct pldmfw *context, struct pldmfw_record *record) +{ + struct pldmfw_desc_tlv *desc; + u32 anti_rollback_ver = 0; + struct devlink *devlink; + struct fbnic_dev *fbd; + struct pci_dev *pdev; + + /* First, use the standard PCI matching function */ + if (!pldmfw_op_pci_match_record(context, record)) + return false; + + pdev = to_pci_dev(context->dev); + fbd = pci_get_drvdata(pdev); + devlink = priv_to_devlink(fbd); + + /* If PCI match is successful, check for vendor-specific descriptors */ + list_for_each_entry(desc, &record->descs, entry) { + if (desc->type != PLDM_DESC_ID_VENDOR_DEFINED) + continue; + + if (desc->size < 21 || desc->data[0] != 1 || + desc->data[1] != 15) + continue; + + if (memcmp(desc->data + 2, "AntiRollbackVer", 15) != 0) + continue; + + anti_rollback_ver = get_unaligned_le32(desc->data + 17); + break; + } + + /* Compare versions and return error if they do not match */ + if (anti_rollback_ver < fbd->fw_cap.anti_rollback_version) { + char buf[128]; + + snprintf(buf, sizeof(buf), + "New firmware anti-rollback version (0x%x) is older than device version (0x%x)!", + anti_rollback_ver, fbd->fw_cap.anti_rollback_version); + devlink_flash_update_status_notify(devlink, buf, + "Anti-Rollback", 0, 0); + + return false; + } + + return true; +} + +static int +fbnic_flash_start(struct fbnic_dev *fbd, struct pldmfw_component *component) +{ + struct fbnic_fw_completion *cmpl; + int err; + + cmpl = kzalloc(sizeof(*cmpl), GFP_KERNEL); + if (!cmpl) + return -ENOMEM; + + fbnic_fw_init_cmpl(cmpl, FBNIC_TLV_MSG_ID_FW_START_UPGRADE_REQ); + err = fbnic_fw_xmit_fw_start_upgrade(fbd, cmpl, + component->identifier, + component->component_size); + if (err) + goto cmpl_free; + + /* Wait for firmware to ack firmware upgrade start */ + if (wait_for_completion_timeout(&cmpl->done, 10 * HZ)) + err = cmpl->result; + else + err = -ETIMEDOUT; + + fbnic_fw_clear_cmpl(fbd, cmpl); +cmpl_free: + fbnic_fw_put_cmpl(cmpl); + + return err; +} + +static int +fbnic_flash_component(struct pldmfw *context, + struct pldmfw_component *component) +{ + const u8 *data = component->component_data; + const u32 size = component->component_size; + struct fbnic_fw_completion *cmpl; + const char *component_name; + struct devlink *devlink; + struct fbnic_dev *fbd; + struct pci_dev *pdev; + u32 offset = 0; + u32 length = 0; + char buf[32]; + int err; + + pdev = to_pci_dev(context->dev); + fbd = pci_get_drvdata(pdev); + devlink = priv_to_devlink(fbd); + + switch (component->identifier) { + case QSPI_SECTION_CMRT: + component_name = "boot1"; + break; + case QSPI_SECTION_CONTROL_FW: + component_name = "boot2"; + break; + case QSPI_SECTION_OPTION_ROM: + component_name = "option-rom"; + break; + default: + snprintf(buf, sizeof(buf), "Unknown component ID %u!", + component->identifier); + devlink_flash_update_status_notify(devlink, buf, NULL, 0, + size); + return -EINVAL; + } + + /* Once firmware receives the request to start upgrading it responds + * with two messages: + * 1. An ACK that it received the message and possible error code + * indicating that an upgrade is not currently possible. + * 2. A request for the first chunk of data + * + * Setup completions for write before issuing the start message so + * the driver can catch both messages. + */ + cmpl = kzalloc(sizeof(*cmpl), GFP_KERNEL); + if (!cmpl) + return -ENOMEM; + + fbnic_fw_init_cmpl(cmpl, FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_REQ); + err = fbnic_mbx_set_cmpl(fbd, cmpl); + if (err) + goto cmpl_free; + + devlink_flash_update_timeout_notify(devlink, "Initializing", + component_name, 15); + err = fbnic_flash_start(fbd, component); + if (err) + goto err_no_msg; + + while (offset < size) { + if (!wait_for_completion_timeout(&cmpl->done, 15 * HZ)) { + err = -ETIMEDOUT; + break; + } + + err = cmpl->result; + if (err) + break; + + /* Verify firmware is requesting the next chunk in the seq. */ + if (cmpl->u.fw_update.offset != offset + length) { + err = -EFAULT; + break; + } + + offset = cmpl->u.fw_update.offset; + length = cmpl->u.fw_update.length; + + if (length > TLV_MAX_DATA || offset + length > size) { + err = -EFAULT; + break; + } + + devlink_flash_update_status_notify(devlink, "Flashing", + component_name, + offset, size); + + /* Mailbox will set length to 0 once it receives the finish + * message. + */ + if (!length) + continue; + + reinit_completion(&cmpl->done); + err = fbnic_fw_xmit_fw_write_chunk(fbd, data, offset, length, + 0); + if (err) + break; + } + + if (err) { + fbnic_fw_xmit_fw_write_chunk(fbd, NULL, 0, 0, err); +err_no_msg: + snprintf(buf, sizeof(buf), "Mailbox encountered error %d!", + err); + devlink_flash_update_status_notify(devlink, buf, + component_name, 0, 0); + } + + fbnic_fw_clear_cmpl(fbd, cmpl); +cmpl_free: + fbnic_fw_put_cmpl(cmpl); + + return err; +} + +static const struct pldmfw_ops fbnic_pldmfw_ops = { + .match_record = fbnic_pldm_match_record, + .flash_component = fbnic_flash_component, +}; + +static int +fbnic_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct fbnic_dev *fbd = devlink_priv(devlink); + const struct firmware *fw = params->fw; + struct device *dev = fbd->dev; + struct pldmfw context; + char *err_msg; + int err; + + context.ops = &fbnic_pldmfw_ops; + context.dev = dev; + + err = pldmfw_flash_image(&context, fw); + if (err) { + switch (err) { + case -EINVAL: + err_msg = "Invalid image"; + break; + case -EOPNOTSUPP: + err_msg = "Unsupported image"; + break; + case -ENOMEM: + err_msg = "Out of memory"; + break; + case -EFAULT: + err_msg = "Invalid header"; + break; + case -ENOENT: + err_msg = "No matching record"; + break; + case -ENODEV: + err_msg = "No matching device"; + break; + case -ETIMEDOUT: + err_msg = "Timed out waiting for reply"; + break; + default: + err_msg = "Unknown error"; + break; + } + + NL_SET_ERR_MSG_FMT_MOD(extack, + "Failed to flash PLDM Image: %s (error: %d)", + err_msg, err); + } + + return err; +} + static const struct devlink_ops fbnic_devlink_ops = { - .info_get = fbnic_devlink_info_get, + .info_get = fbnic_devlink_info_get, + .flash_update = fbnic_devlink_flash_update, }; void fbnic_devlink_free(struct fbnic_dev *fbd) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index 0ab6ae3859e4..6baac10fd688 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -100,6 +100,15 @@ do { \ #define fbnic_mk_fw_ver_str(_rev_id, _str) \ fbnic_mk_full_fw_ver_str(_rev_id, "", "", _str, sizeof(_str)) +enum { + QSPI_SECTION_CMRT = 0, + QSPI_SECTION_CONTROL_FW = 1, + QSPI_SECTION_UCODE = 2, + QSPI_SECTION_OPTION_ROM = 3, + QSPI_SECTION_USER = 4, + QSPI_SECTION_INVALID, +}; + #define FW_HEARTBEAT_PERIOD (10 * HZ) enum { diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 70a852b3e99d..249d3ef862d5 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "fbnic.h" #include "fbnic_drvinfo.h" @@ -388,8 +389,12 @@ static int fbnic_pm_suspend(struct device *dev) rtnl_unlock(); null_uc_addr: + devl_lock(priv_to_devlink(fbd)); + fbnic_fw_free_mbx(fbd); + devl_unlock(priv_to_devlink(fbd)); + /* Free the IRQs so they aren't trying to occupy sleeping CPUs */ fbnic_free_irqs(fbd); @@ -420,11 +425,15 @@ static int __fbnic_pm_resume(struct device *dev) fbd->mac->init_regs(fbd); + devl_lock(priv_to_devlink(fbd)); + /* Re-enable mailbox */ err = fbnic_fw_request_mbx(fbd); if (err) goto err_free_irqs; + devl_unlock(priv_to_devlink(fbd)); + /* No netdev means there isn't a network interface to bring up */ if (fbnic_init_failure(fbd)) return 0; -- 2.51.0 From c16608005ccb99fbde3a4cd96eab28e16f148abf Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 13 May 2025 11:19:22 +0300 Subject: [PATCH 08/16] net: Look for bonding slaves in the bond's network namespace Update the for_each_netdev_in_bond_rcu macro to iterate through network devices in the bond's network namespace instead of always using init_net. This change is safe because: 1. **Bond-Slave Namespace Relationship**: A bond device and its slaves must reside in the same network namespace. The bond device's namespace is established at creation time and cannot change. 2. **Slave Movement Implications**: Any attempt to move a slave device to a different namespace automatically removes it from the bond, as per kernel networking stack rules. This maintains the invariant that slaves must exist in the same namespace as their bond. This change is part of an effort to enable Link Aggregation (LAG) to work properly inside custom network namespaces. Previously, the macro would only find slave devices in the initial network namespace, preventing proper bonding functionality in custom namespaces. Signed-off-by: Shay Drory Signed-off-by: Mark Bloch Link: https://patch.msgid.link/20250513081922.525716-1-mbloch@nvidia.com Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 32a1e41636a9..9e3a2d8452d6 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3268,7 +3268,7 @@ int call_netdevice_notifiers_info(unsigned long val, #define for_each_netdev_continue_rcu(net, d) \ list_for_each_entry_continue_rcu(d, &(net)->dev_base_head, dev_list) #define for_each_netdev_in_bond_rcu(bond, slave) \ - for_each_netdev_rcu(&init_net, slave) \ + for_each_netdev_rcu(dev_net_rcu(bond), slave) \ if (netdev_master_upper_dev_get_rcu(slave) == (bond)) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) -- 2.51.0 From aa2263b3c3e2286575ff286bb538847b4ab4ba49 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Tue, 13 May 2025 17:10:05 +0530 Subject: [PATCH 09/16] octeontx2-af: convert dev_dbg to tracepoint in mbox Use tracepoint instead of dev_dbg since the entire mailbox code uses tracepoints for debugging. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1747136408-30685-2-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/mbox.c | 3 +-- drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 1e5aa5397504..5547d20310df 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -188,14 +188,13 @@ int otx2_mbox_wait_for_rsp(struct otx2_mbox *mbox, int devid) { unsigned long timeout = jiffies + msecs_to_jiffies(MBOX_RSP_TIMEOUT); struct otx2_mbox_dev *mdev = &mbox->dev[devid]; - struct device *sender = &mbox->pdev->dev; while (!time_after(jiffies, timeout)) { if (mdev->num_msgs == mdev->msgs_acked) return 0; usleep_range(800, 1000); } - dev_dbg(sender, "timed out while waiting for rsp\n"); + trace_otx2_msg_wait_rsp(mbox->pdev); return -EIO; } EXPORT_SYMBOL(otx2_mbox_wait_for_rsp); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h index 5704520f9b02..6b5a14dcb643 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h @@ -95,6 +95,17 @@ TRACE_EVENT(otx2_msg_process, otx2_mbox_id2name(__entry->id), __entry->err) ); +TRACE_EVENT(otx2_msg_wait_rsp, + TP_PROTO(const struct pci_dev *pdev), + TP_ARGS(pdev), + TP_STRUCT__entry(__string(dev, pci_name(pdev)) + ), + TP_fast_assign(__assign_str(dev) + ), + TP_printk("[%s] timed out while waiting for response\n", + __get_str(dev)) +); + #endif /* __RVU_TRACE_H */ #undef TRACE_INCLUDE_PATH -- 2.51.0 From ba7b63670312d4ad2ce5e4d43652c6b38d3fb56f Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Tue, 13 May 2025 17:10:06 +0530 Subject: [PATCH 10/16] octeontx2-af: Display names for CPT and UP messages Mailbox UP messages and CPT messages names are not being displayed with their names in trace log files. Add those messages too in otx2_mbox_id2name. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1747136408-30685-3-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/marvell/octeontx2/af/mbox.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 5547d20310df..5c457e46977e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -444,6 +444,14 @@ const char *otx2_mbox_id2name(u16 id) #define M(_name, _id, _1, _2, _3) case _id: return # _name; MBOX_MESSAGES #undef M + +#define M(_name, _id, _1, _2, _3) case _id: return # _name; + MBOX_UP_CGX_MESSAGES +#undef M + +#define M(_name, _id, _1, _2, _3) case _id: return # _name; + MBOX_UP_CPT_MESSAGES +#undef M default: return "INVALID ID"; } -- 2.51.0 From 27d27a06b48e0781fd9e207646eba0e14ee0e439 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Tue, 13 May 2025 17:10:07 +0530 Subject: [PATCH 11/16] octeontx2: Add pcifunc also to mailbox tracepoints This patch adds pcifunc which represents PF and VF device to the tracepoints otx2_msg_alloc, otx2_msg_send, otx2_msg_process so that it is easier to correlate which device allocated the message, which device forwarded it and which device processed that message. Also add message id in otx2_msg_send tracepoint to check which message is sent at any point of time from a device. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1747136408-30685-4-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../net/ethernet/marvell/octeontx2/af/mbox.c | 6 +++- .../net/ethernet/marvell/octeontx2/af/rvu.c | 2 +- .../ethernet/marvell/octeontx2/af/rvu_cgx.c | 2 +- .../ethernet/marvell/octeontx2/af/rvu_trace.h | 36 ++++++++++++------- .../marvell/octeontx2/nic/otx2_common.h | 4 ++- .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 +- .../ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 +- 7 files changed, 37 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c index 5c457e46977e..7d21905deed8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.c @@ -218,6 +218,7 @@ static void otx2_mbox_msg_send_data(struct otx2_mbox *mbox, int devid, u64 data) struct otx2_mbox_dev *mdev = &mbox->dev[devid]; struct mbox_hdr *tx_hdr, *rx_hdr; void *hw_mbase = mdev->hwbase; + struct mbox_msghdr *msg; u64 intr_val; tx_hdr = hw_mbase + mbox->tx_start; @@ -250,7 +251,10 @@ static void otx2_mbox_msg_send_data(struct otx2_mbox *mbox, int devid, u64 data) tx_hdr->num_msgs = mdev->num_msgs; rx_hdr->num_msgs = 0; - trace_otx2_msg_send(mbox->pdev, tx_hdr->num_msgs, tx_hdr->msg_size); + msg = (struct mbox_msghdr *)(hw_mbase + mbox->tx_start + msgs_offset); + + trace_otx2_msg_send(mbox->pdev, tx_hdr->num_msgs, tx_hdr->msg_size, + msg->id, msg->pcifunc); spin_unlock(&mdev->mbox_lock); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 6575c422635b..511eb5b2a2d4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2173,7 +2173,7 @@ static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid, if (rsp && err) \ rsp->hdr.rc = err; \ \ - trace_otx2_msg_process(mbox->pdev, _id, err); \ + trace_otx2_msg_process(mbox->pdev, _id, err, req->pcifunc); \ return rsp ? err : -ENOMEM; \ } MBOX_MESSAGES diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 992fa0b82e8d..38db06b1784c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -34,7 +34,7 @@ static struct _req_type __maybe_unused \ return NULL; \ req->hdr.sig = OTX2_MBOX_REQ_SIG; \ req->hdr.id = _id; \ - trace_otx2_msg_alloc(rvu->pdev, _id, sizeof(*req)); \ + trace_otx2_msg_alloc(rvu->pdev, _id, sizeof(*req), 0); \ return req; \ } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h index 6b5a14dcb643..abbdb0361fb8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h @@ -18,33 +18,42 @@ #include "mbox.h" TRACE_EVENT(otx2_msg_alloc, - TP_PROTO(const struct pci_dev *pdev, u16 id, u64 size), - TP_ARGS(pdev, id, size), + TP_PROTO(const struct pci_dev *pdev, u16 id, u64 size, u16 pcifunc), + TP_ARGS(pdev, id, size, pcifunc), TP_STRUCT__entry(__string(dev, pci_name(pdev)) __field(u16, id) __field(u64, size) + __field(u16, pcifunc) ), TP_fast_assign(__assign_str(dev); __entry->id = id; __entry->size = size; + __entry->pcifunc = pcifunc; ), - TP_printk("[%s] msg:(%s) size:%lld\n", __get_str(dev), - otx2_mbox_id2name(__entry->id), __entry->size) + TP_printk("[%s] msg:(%s) size:%lld pcifunc:0x%x\n", __get_str(dev), + otx2_mbox_id2name(__entry->id), __entry->size, + __entry->pcifunc) ); TRACE_EVENT(otx2_msg_send, - TP_PROTO(const struct pci_dev *pdev, u16 num_msgs, u64 msg_size), - TP_ARGS(pdev, num_msgs, msg_size), + TP_PROTO(const struct pci_dev *pdev, u16 num_msgs, u64 msg_size, + u16 id, u16 pcifunc), + TP_ARGS(pdev, num_msgs, msg_size, id, pcifunc), TP_STRUCT__entry(__string(dev, pci_name(pdev)) __field(u16, num_msgs) __field(u64, msg_size) + __field(u16, id) + __field(u16, pcifunc) ), TP_fast_assign(__assign_str(dev); __entry->num_msgs = num_msgs; __entry->msg_size = msg_size; + __entry->id = id; + __entry->pcifunc = pcifunc; ), - TP_printk("[%s] sent %d msg(s) of size:%lld\n", __get_str(dev), - __entry->num_msgs, __entry->msg_size) + TP_printk("[%s] sent %d msg(s) of size:%lld msg:(%s) pcifunc:0x%x\n", + __get_str(dev), __entry->num_msgs, __entry->msg_size, + otx2_mbox_id2name(__entry->id), __entry->pcifunc) ); TRACE_EVENT(otx2_msg_check, @@ -81,18 +90,21 @@ TRACE_EVENT(otx2_msg_interrupt, ); TRACE_EVENT(otx2_msg_process, - TP_PROTO(const struct pci_dev *pdev, u16 id, int err), - TP_ARGS(pdev, id, err), + TP_PROTO(const struct pci_dev *pdev, u16 id, int err, u16 pcifunc), + TP_ARGS(pdev, id, err, pcifunc), TP_STRUCT__entry(__string(dev, pci_name(pdev)) __field(u16, id) __field(int, err) + __field(u16, pcifunc) ), TP_fast_assign(__assign_str(dev); __entry->id = id; __entry->err = err; + __entry->pcifunc = pcifunc; ), - TP_printk("[%s] msg:(%s) error:%d\n", __get_str(dev), - otx2_mbox_id2name(__entry->id), __entry->err) + TP_printk("[%s] msg:(%s) error:%d pcifunc:0x%x\n", __get_str(dev), + otx2_mbox_id2name(__entry->id), + __entry->err, __entry->pcifunc) ); TRACE_EVENT(otx2_msg_wait_rsp, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 7e3ddb0bee12..d188936f89bb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -871,6 +871,7 @@ static struct _req_type __maybe_unused \ *otx2_mbox_alloc_msg_ ## _fn_name(struct mbox *mbox) \ { \ struct _req_type *req; \ + u16 pcifunc = mbox->pfvf->pcifunc; \ \ req = (struct _req_type *)otx2_mbox_alloc_msg_rsp( \ &mbox->mbox, 0, sizeof(struct _req_type), \ @@ -879,7 +880,8 @@ static struct _req_type __maybe_unused \ return NULL; \ req->hdr.sig = OTX2_MBOX_REQ_SIG; \ req->hdr.id = _id; \ - trace_otx2_msg_alloc(mbox->mbox.pdev, _id, sizeof(*req)); \ + req->hdr.pcifunc = pcifunc; \ + trace_otx2_msg_alloc(mbox->mbox.pdev, _id, sizeof(*req), pcifunc); \ return req; \ } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 0aee8e3861f3..1bbc17b34d7c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -473,7 +473,7 @@ static void otx2_pfvf_mbox_handler(struct work_struct *work) goto inval_msg; /* Set VF's number in each of the msg */ - msg->pcifunc &= RVU_PFVF_FUNC_MASK; + msg->pcifunc &= ~RVU_PFVF_FUNC_MASK; msg->pcifunc |= (vf_idx + 1) & RVU_PFVF_FUNC_MASK; offset = msg->next_msgoff; } @@ -3285,6 +3285,7 @@ static void otx2_vf_link_event_task(struct work_struct *work) req = (struct cgx_link_info_msg *)msghdr; req->hdr.id = MBOX_MSG_CGX_LINK_EVENT; req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = pf->pcifunc; memcpy(&req->link_info, &pf->linfo, sizeof(req->link_info)); otx2_mbox_wait_for_zero(&pf->mbox_pfvf[0].mbox_up, vf_idx); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index fb4da816d218..ba4ae6d9c569 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -136,7 +136,7 @@ static int otx2vf_process_mbox_msg_up(struct otx2_nic *vf, rsp->hdr.id = MBOX_MSG_CGX_LINK_EVENT; rsp->hdr.sig = OTX2_MBOX_RSP_SIG; - rsp->hdr.pcifunc = 0; + rsp->hdr.pcifunc = req->pcifunc; rsp->hdr.rc = 0; err = otx2_mbox_up_handler_cgx_link_event( vf, (struct cgx_link_info_msg *)req, rsp); -- 2.51.0 From fa00077d8fd6728a7cda48d0795d885ea2006a92 Mon Sep 17 00:00:00 2001 From: Subbaraya Sundeep Date: Tue, 13 May 2025 17:10:08 +0530 Subject: [PATCH 12/16] octeontx2: Add new tracepoint otx2_msg_status Apart from netdev interface Octeontx2 PF does the following: 1. Sends its own requests to AF and receives responses from AF. 2. Receives async messages from AF. 3. Forwards VF requests to AF, sends respective responses from AF to VFs. 4. Sends async messages to VFs. This patch adds new tracepoint otx2_msg_status to display the status of PF wrt mailbox handling. Signed-off-by: Subbaraya Sundeep Link: https://patch.msgid.link/1747136408-30685-5-git-send-email-sbhatta@marvell.com Signed-off-by: Paolo Abeni --- .../ethernet/marvell/octeontx2/af/rvu_trace.c | 1 + .../ethernet/marvell/octeontx2/af/rvu_trace.h | 15 +++++++++++++++ .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c index 775fd4c35794..5f69380ccc82 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.c @@ -11,3 +11,4 @@ EXPORT_TRACEPOINT_SYMBOL(otx2_msg_alloc); EXPORT_TRACEPOINT_SYMBOL(otx2_msg_interrupt); EXPORT_TRACEPOINT_SYMBOL(otx2_msg_process); +EXPORT_TRACEPOINT_SYMBOL(otx2_msg_status); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h index abbdb0361fb8..db02b4d6ed4b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_trace.h @@ -118,6 +118,21 @@ TRACE_EVENT(otx2_msg_wait_rsp, __get_str(dev)) ); +TRACE_EVENT(otx2_msg_status, + TP_PROTO(const struct pci_dev *pdev, const char *msg, u16 num_msgs), + TP_ARGS(pdev, msg, num_msgs), + TP_STRUCT__entry(__string(dev, pci_name(pdev)) + __string(str, msg) + __field(u16, num_msgs) + ), + TP_fast_assign(__assign_str(dev); + __assign_str(str); + __entry->num_msgs = num_msgs; + ), + TP_printk("[%s] %s num_msgs:%d\n", __get_str(dev), + __get_str(str), __entry->num_msgs) +); + #endif /* __RVU_TRACE_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 1bbc17b34d7c..d79b4b30176d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -465,6 +465,9 @@ static void otx2_pfvf_mbox_handler(struct work_struct *work) offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + trace_otx2_msg_status(pf->pdev, "PF-VF down queue handler(forwarding)", + vf_mbox->num_msgs); + for (id = 0; id < vf_mbox->num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + mbox->rx_start + offset); @@ -503,6 +506,9 @@ static void otx2_pfvf_mbox_up_handler(struct work_struct *work) offset = mbox->rx_start + ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + trace_otx2_msg_status(pf->pdev, "PF-VF up queue handler(response)", + vf_mbox->up_num_msgs); + for (id = 0; id < vf_mbox->up_num_msgs; id++) { msg = mdev->mbase + offset; @@ -819,6 +825,9 @@ static void otx2_pfaf_mbox_handler(struct work_struct *work) offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); pf = af_mbox->pfvf; + trace_otx2_msg_status(pf->pdev, "PF-AF down queue handler(response)", + num_msgs); + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); otx2_process_pfaf_mbox_msg(pf, msg); @@ -974,6 +983,9 @@ static void otx2_pfaf_mbox_up_handler(struct work_struct *work) offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + trace_otx2_msg_status(pf->pdev, "PF-AF up queue handler(notification)", + num_msgs); + for (id = 0; id < num_msgs; id++) { msg = (struct mbox_msghdr *)(mdev->mbase + offset); @@ -1023,6 +1035,9 @@ static irqreturn_t otx2_pfaf_mbox_intr_handler(int irq, void *pf_irq) trace_otx2_msg_interrupt(pf->pdev, "UP message from AF to PF", BIT_ULL(0)); + + trace_otx2_msg_status(pf->pdev, "PF-AF up work queued(interrupt)", + hdr->num_msgs); } if (mbox_data & MBOX_DOWN_MSG) { @@ -1039,6 +1054,9 @@ static irqreturn_t otx2_pfaf_mbox_intr_handler(int irq, void *pf_irq) trace_otx2_msg_interrupt(pf->pdev, "DOWN reply from AF to PF", BIT_ULL(0)); + + trace_otx2_msg_status(pf->pdev, "PF-AF down work queued(interrupt)", + hdr->num_msgs); } return IRQ_HANDLED; -- 2.51.0 From 32471b2f481dea8624f27669d36ffd131d24b732 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 May 2025 11:27:22 +0200 Subject: [PATCH 13/16] net: page_pool: Don't recycle into cache on PREEMPT_RT With preemptible softirq and no per-CPU locking in local_bh_disable() on PREEMPT_RT the consumer can be preempted while a skb is returned. Avoid the race by disabling the recycle into the cache on PREEMPT_RT. Cc: Jesper Dangaard Brouer Cc: Ilias Apalodimas Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250512092736.229935-2-bigeasy@linutronix.de Signed-off-by: Paolo Abeni --- net/core/page_pool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 2b7684865941..974f3eef2efa 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -839,6 +839,10 @@ static bool page_pool_napi_local(const struct page_pool *pool) const struct napi_struct *napi; u32 cpuid; + /* On PREEMPT_RT the softirq can be preempted by the consumer */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + return false; + if (unlikely(!in_softirq())) return false; -- 2.51.0 From c99dac52ffad5df88c9c52ab2008b182743bdcc1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 May 2025 11:27:23 +0200 Subject: [PATCH 14/16] net: dst_cache: Use nested-BH locking for dst_cache::cache dst_cache::cache is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Add a local_lock_t to the data structure and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250512092736.229935-3-bigeasy@linutronix.de Signed-off-by: Paolo Abeni --- net/core/dst_cache.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c index 70c634b9e7b0..93a04d18e505 100644 --- a/net/core/dst_cache.c +++ b/net/core/dst_cache.c @@ -17,6 +17,7 @@ struct dst_cache_pcpu { unsigned long refresh_ts; struct dst_entry *dst; + local_lock_t bh_lock; u32 cookie; union { struct in_addr in_saddr; @@ -65,10 +66,15 @@ fail: struct dst_entry *dst_cache_get(struct dst_cache *dst_cache) { + struct dst_entry *dst; + if (!dst_cache->cache) return NULL; - return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache)); + local_lock_nested_bh(&dst_cache->cache->bh_lock); + dst = dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache)); + local_unlock_nested_bh(&dst_cache->cache->bh_lock); + return dst; } EXPORT_SYMBOL_GPL(dst_cache_get); @@ -80,12 +86,16 @@ struct rtable *dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr) if (!dst_cache->cache) return NULL; + local_lock_nested_bh(&dst_cache->cache->bh_lock); idst = this_cpu_ptr(dst_cache->cache); dst = dst_cache_per_cpu_get(dst_cache, idst); - if (!dst) + if (!dst) { + local_unlock_nested_bh(&dst_cache->cache->bh_lock); return NULL; + } *saddr = idst->in_saddr.s_addr; + local_unlock_nested_bh(&dst_cache->cache->bh_lock); return dst_rtable(dst); } EXPORT_SYMBOL_GPL(dst_cache_get_ip4); @@ -98,9 +108,11 @@ void dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst, if (!dst_cache->cache) return; + local_lock_nested_bh(&dst_cache->cache->bh_lock); idst = this_cpu_ptr(dst_cache->cache); dst_cache_per_cpu_dst_set(idst, dst, 0); idst->in_saddr.s_addr = saddr; + local_unlock_nested_bh(&dst_cache->cache->bh_lock); } EXPORT_SYMBOL_GPL(dst_cache_set_ip4); @@ -113,10 +125,13 @@ void dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst, if (!dst_cache->cache) return; + local_lock_nested_bh(&dst_cache->cache->bh_lock); + idst = this_cpu_ptr(dst_cache->cache); dst_cache_per_cpu_dst_set(idst, dst, rt6_get_cookie(dst_rt6_info(dst))); idst->in6_saddr = *saddr; + local_unlock_nested_bh(&dst_cache->cache->bh_lock); } EXPORT_SYMBOL_GPL(dst_cache_set_ip6); @@ -129,12 +144,17 @@ struct dst_entry *dst_cache_get_ip6(struct dst_cache *dst_cache, if (!dst_cache->cache) return NULL; + local_lock_nested_bh(&dst_cache->cache->bh_lock); + idst = this_cpu_ptr(dst_cache->cache); dst = dst_cache_per_cpu_get(dst_cache, idst); - if (!dst) + if (!dst) { + local_unlock_nested_bh(&dst_cache->cache->bh_lock); return NULL; + } *saddr = idst->in6_saddr; + local_unlock_nested_bh(&dst_cache->cache->bh_lock); return dst; } EXPORT_SYMBOL_GPL(dst_cache_get_ip6); @@ -142,10 +162,14 @@ EXPORT_SYMBOL_GPL(dst_cache_get_ip6); int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp) { + unsigned int i; + dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu, gfp | __GFP_ZERO); if (!dst_cache->cache) return -ENOMEM; + for_each_possible_cpu(i) + local_lock_init(&per_cpu_ptr(dst_cache->cache, i)->bh_lock); dst_cache_reset(dst_cache); return 0; -- 2.51.0 From 1c0829788a6e6e165846b9bedd0b908ef16260b6 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 May 2025 11:27:24 +0200 Subject: [PATCH 15/16] ipv4/route: Use this_cpu_inc() for stats on PREEMPT_RT The statistics are incremented with raw_cpu_inc() assuming it always happens with bottom half disabled. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this is no longer true. Use this_cpu_inc() on PREEMPT_RT for the increment to not worry about preemption. Cc: David Ahern Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250512092736.229935-4-bigeasy@linutronix.de Signed-off-by: Paolo Abeni --- net/ipv4/route.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 507b2e5dec50..fccb05fb3a79 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -189,7 +189,11 @@ const __u8 ip_tos2prio[16] = { EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); +#ifndef CONFIG_PREEMPT_RT #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) +#else +#define RT_CACHE_STAT_INC(field) this_cpu_inc(rt_cache_stat.field) +#endif #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) -- 2.51.0 From bc57eda646cea6a9077ba1b781bf64bc0ab836a7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 May 2025 11:27:25 +0200 Subject: [PATCH 16/16] ipv6: sr: Use nested-BH locking for hmac_storage hmac_storage is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. Add a local_lock_t to the data structure and use local_lock_nested_bh() for locking. This change adds only lockdep coverage and does not alter the functional behaviour for !PREEMPT_RT. Cc: David Ahern Signed-off-by: Sebastian Andrzej Siewior Link: https://patch.msgid.link/20250512092736.229935-5-bigeasy@linutronix.de Signed-off-by: Paolo Abeni --- net/ipv6/seg6_hmac.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index bbf5b84a70fc..f78ecb6ad838 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -40,7 +40,14 @@ #include #include -static DEFINE_PER_CPU(char [SEG6_HMAC_RING_SIZE], hmac_ring); +struct hmac_storage { + local_lock_t bh_lock; + char hmac_ring[SEG6_HMAC_RING_SIZE]; +}; + +static DEFINE_PER_CPU(struct hmac_storage, hmac_storage) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; static int seg6_hmac_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) { @@ -187,7 +194,8 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, */ local_bh_disable(); - ring = this_cpu_ptr(hmac_ring); + local_lock_nested_bh(&hmac_storage.bh_lock); + ring = this_cpu_ptr(hmac_storage.hmac_ring); off = ring; /* source address */ @@ -212,6 +220,7 @@ int seg6_hmac_compute(struct seg6_hmac_info *hinfo, struct ipv6_sr_hdr *hdr, dgsize = __do_hmac(hinfo, ring, plen, tmp_out, SEG6_HMAC_MAX_DIGESTSIZE); + local_unlock_nested_bh(&hmac_storage.bh_lock); local_bh_enable(); if (dgsize < 0) -- 2.51.0