From 551844f26da2a9f76c0a698baaffa631d1178645 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 30 Dec 2024 19:12:31 +0100 Subject: [PATCH 01/16] mptcp: don't always assume copied data in mptcp_cleanup_rbuf() Under some corner cases the MPTCP protocol can end-up invoking mptcp_cleanup_rbuf() when no data has been copied, but such helper assumes the opposite condition. Explicitly drop such assumption and performs the costly call only when strictly needed - before releasing the msk socket lock. Fixes: fd8976790a6c ("mptcp: be careful on MPTCP-level ack.") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241230-net-mptcp-rbuf-fixes-v1-2-8608af434ceb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 27afdb7e2071..5307fff9d995 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -528,13 +528,13 @@ static void mptcp_send_ack(struct mptcp_sock *msk) mptcp_subflow_send_ack(mptcp_subflow_tcp_sock(subflow)); } -static void mptcp_subflow_cleanup_rbuf(struct sock *ssk) +static void mptcp_subflow_cleanup_rbuf(struct sock *ssk, int copied) { bool slow; slow = lock_sock_fast(ssk); if (tcp_can_send_ack(ssk)) - tcp_cleanup_rbuf(ssk, 1); + tcp_cleanup_rbuf(ssk, copied); unlock_sock_fast(ssk, slow); } @@ -551,7 +551,7 @@ static bool mptcp_subflow_could_cleanup(const struct sock *ssk, bool rx_empty) (ICSK_ACK_PUSHED2 | ICSK_ACK_PUSHED))); } -static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) +static void mptcp_cleanup_rbuf(struct mptcp_sock *msk, int copied) { int old_space = READ_ONCE(msk->old_wspace); struct mptcp_subflow_context *subflow; @@ -559,14 +559,14 @@ static void mptcp_cleanup_rbuf(struct mptcp_sock *msk) int space = __mptcp_space(sk); bool cleanup, rx_empty; - cleanup = (space > 0) && (space >= (old_space << 1)); - rx_empty = !__mptcp_rmem(sk); + cleanup = (space > 0) && (space >= (old_space << 1)) && copied; + rx_empty = !__mptcp_rmem(sk) && copied; mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (cleanup || mptcp_subflow_could_cleanup(ssk, rx_empty)) - mptcp_subflow_cleanup_rbuf(ssk); + mptcp_subflow_cleanup_rbuf(ssk, copied); } } @@ -2220,9 +2220,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, copied += bytes_read; - /* be sure to advertise window change */ - mptcp_cleanup_rbuf(msk); - if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk)) continue; @@ -2271,6 +2268,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } pr_debug("block timeout %ld\n", timeo); + mptcp_cleanup_rbuf(msk, copied); err = sk_wait_data(sk, &timeo, NULL); if (err < 0) { err = copied ? : err; @@ -2278,6 +2276,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } } + mptcp_cleanup_rbuf(msk, copied); + out_err: if (cmsg_flags && copied >= 0) { if (cmsg_flags & MPTCP_CMSG_TS) -- 2.51.0 From 56b824eb49d6258aa0bad09a406ceac3f643cdae Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 30 Dec 2024 19:12:32 +0100 Subject: [PATCH 02/16] mptcp: prevent excessive coalescing on receive Currently the skb size after coalescing is only limited by the skb layout (the skb must not carry frag_list). A single coalesced skb covering several MSS can potentially fill completely the receive buffer. In such a case, the snd win will zero until the receive buffer will be empty again, affecting tput badly. Fixes: 8268ed4c9d19 ("mptcp: introduce and use mptcp_try_coalesce()") Cc: stable@vger.kernel.org # please delay 2 weeks after 6.13-final release Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241230-net-mptcp-rbuf-fixes-v1-3-8608af434ceb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 5307fff9d995..1b2e7cbb577f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -136,6 +136,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, int delta; if (MPTCP_SKB_CB(from)->offset || + ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) || !skb_try_coalesce(to, from, &fragstolen, &delta)) return false; -- 2.51.0 From 9facce84f4062f782ebde18daa7006a23d40b607 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Mon, 23 Dec 2024 20:45:49 +0530 Subject: [PATCH 03/16] net: ti: icssg-prueth: Fix firmware load sequence. Timesync related operations are ran in PRU0 cores for both ICSSG SLICE0 and SLICE1. Currently whenever any ICSSG interface comes up we load the respective firmwares to PRU cores and whenever interface goes down, we stop the resective cores. Due to this, when SLICE0 goes down while SLICE1 is still active, PRU0 firmwares are unloaded and PRU0 core is stopped. This results in clock jump for SLICE1 interface as the timesync related operations are no longer running. As there are interdependencies between SLICE0 and SLICE1 firmwares, fix this by running both PRU0 and PRU1 firmwares as long as at least 1 ICSSG interface is up. Add new flag in prueth struct to check if all firmwares are running and remove the old flag (fw_running). Use emacs_initialized as reference count to load the firmwares for the first and last interface up/down. Moving init_emac_mode and fw_offload_mode API outside of icssg_config to icssg_common_start API as they need to be called only once per firmware boot. Change prueth_emac_restart() to return error code and add error prints inside the caller of this functions in case of any failures. Move prueth_emac_stop() from common to sr1 driver. sr1 and sr2 drivers have different logic handling for stopping the firmwares. While sr1 driver is dependent on emac structure to stop the corresponding pru cores for that slice, for sr2 all the pru cores of both the slices are stopped and is not dependent on emac. So the prueth_emac_stop() function is no longer common and can be moved to sr1 driver. Fixes: c1e0230eeaab ("net: ti: icss-iep: Add IEP driver") Signed-off-by: MD Danish Anwar Signed-off-by: Meghana Malladi Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/icssg/icssg_common.c | 25 -- drivers/net/ethernet/ti/icssg/icssg_config.c | 41 ++- drivers/net/ethernet/ti/icssg/icssg_config.h | 1 + drivers/net/ethernet/ti/icssg/icssg_prueth.c | 261 ++++++++++++------ drivers/net/ethernet/ti/icssg/icssg_prueth.h | 5 +- .../net/ethernet/ti/icssg/icssg_prueth_sr1.c | 24 +- 6 files changed, 236 insertions(+), 121 deletions(-) diff --git a/drivers/net/ethernet/ti/icssg/icssg_common.c b/drivers/net/ethernet/ti/icssg/icssg_common.c index fdebeb2f84e0..74f0f200a89d 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_common.c +++ b/drivers/net/ethernet/ti/icssg/icssg_common.c @@ -855,31 +855,6 @@ irqreturn_t prueth_rx_irq(int irq, void *dev_id) } EXPORT_SYMBOL_GPL(prueth_rx_irq); -void prueth_emac_stop(struct prueth_emac *emac) -{ - struct prueth *prueth = emac->prueth; - int slice; - - switch (emac->port_id) { - case PRUETH_PORT_MII0: - slice = ICSS_SLICE0; - break; - case PRUETH_PORT_MII1: - slice = ICSS_SLICE1; - break; - default: - netdev_err(emac->ndev, "invalid port\n"); - return; - } - - emac->fw_running = 0; - if (!emac->is_sr1) - rproc_shutdown(prueth->txpru[slice]); - rproc_shutdown(prueth->rtu[slice]); - rproc_shutdown(prueth->pru[slice]); -} -EXPORT_SYMBOL_GPL(prueth_emac_stop); - void prueth_cleanup_tx_ts(struct prueth_emac *emac) { int i; diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c index 5d2491c2943a..ddfd1c02a885 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.c +++ b/drivers/net/ethernet/ti/icssg/icssg_config.c @@ -397,7 +397,7 @@ static int prueth_emac_buffer_setup(struct prueth_emac *emac) return 0; } -static void icssg_init_emac_mode(struct prueth *prueth) +void icssg_init_emac_mode(struct prueth *prueth) { /* When the device is configured as a bridge and it is being brought * back to the emac mode, the host mac address has to be set as 0. @@ -406,9 +406,6 @@ static void icssg_init_emac_mode(struct prueth *prueth) int i; u8 mac[ETH_ALEN] = { 0 }; - if (prueth->emacs_initialized) - return; - /* Set VLAN TABLE address base */ regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, SMEM_VLAN_OFFSET_MASK, addr << SMEM_VLAN_OFFSET); @@ -423,15 +420,13 @@ static void icssg_init_emac_mode(struct prueth *prueth) /* Clear host MAC address */ icssg_class_set_host_mac_addr(prueth->miig_rt, mac); } +EXPORT_SYMBOL_GPL(icssg_init_emac_mode); -static void icssg_init_fw_offload_mode(struct prueth *prueth) +void icssg_init_fw_offload_mode(struct prueth *prueth) { u32 addr = prueth->shram.pa + EMAC_ICSSG_SWITCH_DEFAULT_VLAN_TABLE_OFFSET; int i; - if (prueth->emacs_initialized) - return; - /* Set VLAN TABLE address base */ regmap_update_bits(prueth->miig_rt, FDB_GEN_CFG1, SMEM_VLAN_OFFSET_MASK, addr << SMEM_VLAN_OFFSET); @@ -448,6 +443,7 @@ static void icssg_init_fw_offload_mode(struct prueth *prueth) icssg_class_set_host_mac_addr(prueth->miig_rt, prueth->hw_bridge_dev->dev_addr); icssg_set_pvid(prueth, prueth->default_vlan, PRUETH_PORT_HOST); } +EXPORT_SYMBOL_GPL(icssg_init_fw_offload_mode); int icssg_config(struct prueth *prueth, struct prueth_emac *emac, int slice) { @@ -455,11 +451,6 @@ int icssg_config(struct prueth *prueth, struct prueth_emac *emac, int slice) struct icssg_flow_cfg __iomem *flow_cfg; int ret; - if (prueth->is_switch_mode || prueth->is_hsr_offload_mode) - icssg_init_fw_offload_mode(prueth); - else - icssg_init_emac_mode(prueth); - memset_io(config, 0, TAS_GATE_MASK_LIST0); icssg_miig_queues_init(prueth, slice); @@ -786,3 +777,27 @@ void icssg_set_pvid(struct prueth *prueth, u8 vid, u8 port) writel(pvid, prueth->shram.va + EMAC_ICSSG_SWITCH_PORT0_DEFAULT_VLAN_OFFSET); } EXPORT_SYMBOL_GPL(icssg_set_pvid); + +int emac_fdb_flow_id_updated(struct prueth_emac *emac) +{ + struct mgmt_cmd_rsp fdb_cmd_rsp = { 0 }; + int slice = prueth_emac_slice(emac); + struct mgmt_cmd fdb_cmd = { 0 }; + int ret; + + fdb_cmd.header = ICSSG_FW_MGMT_CMD_HEADER; + fdb_cmd.type = ICSSG_FW_MGMT_FDB_CMD_TYPE_RX_FLOW; + fdb_cmd.seqnum = ++(emac->prueth->icssg_hwcmdseq); + fdb_cmd.param = 0; + + fdb_cmd.param |= (slice << 4); + fdb_cmd.cmd_args[0] = 0; + + ret = icssg_send_fdb_msg(emac, &fdb_cmd, &fdb_cmd_rsp); + if (ret) + return ret; + + WARN_ON(fdb_cmd.seqnum != fdb_cmd_rsp.seqnum); + return fdb_cmd_rsp.status == 1 ? 0 : -EINVAL; +} +EXPORT_SYMBOL_GPL(emac_fdb_flow_id_updated); diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.h b/drivers/net/ethernet/ti/icssg/icssg_config.h index 92c2deaa3068..c884e9fa099e 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.h +++ b/drivers/net/ethernet/ti/icssg/icssg_config.h @@ -55,6 +55,7 @@ struct icssg_rxq_ctx { #define ICSSG_FW_MGMT_FDB_CMD_TYPE 0x03 #define ICSSG_FW_MGMT_CMD_TYPE 0x04 #define ICSSG_FW_MGMT_PKT 0x80000000 +#define ICSSG_FW_MGMT_FDB_CMD_TYPE_RX_FLOW 0x05 struct icssg_r30_cmd { u32 cmd[4]; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index c568c84a032b..d76fe6d05e10 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -164,11 +164,26 @@ static struct icssg_firmwares icssg_emac_firmwares[] = { } }; -static int prueth_emac_start(struct prueth *prueth, struct prueth_emac *emac) +static int prueth_start(struct rproc *rproc, const char *fw_name) +{ + int ret; + + ret = rproc_set_firmware(rproc, fw_name); + if (ret) + return ret; + return rproc_boot(rproc); +} + +static void prueth_shutdown(struct rproc *rproc) +{ + rproc_shutdown(rproc); +} + +static int prueth_emac_start(struct prueth *prueth) { struct icssg_firmwares *firmwares; struct device *dev = prueth->dev; - int slice, ret; + int ret, slice; if (prueth->is_switch_mode) firmwares = icssg_switch_firmwares; @@ -177,49 +192,126 @@ static int prueth_emac_start(struct prueth *prueth, struct prueth_emac *emac) else firmwares = icssg_emac_firmwares; - slice = prueth_emac_slice(emac); - if (slice < 0) { - netdev_err(emac->ndev, "invalid port\n"); - return -EINVAL; + for (slice = 0; slice < PRUETH_NUM_MACS; slice++) { + ret = prueth_start(prueth->pru[slice], firmwares[slice].pru); + if (ret) { + dev_err(dev, "failed to boot PRU%d: %d\n", slice, ret); + goto unwind_slices; + } + + ret = prueth_start(prueth->rtu[slice], firmwares[slice].rtu); + if (ret) { + dev_err(dev, "failed to boot RTU%d: %d\n", slice, ret); + rproc_shutdown(prueth->pru[slice]); + goto unwind_slices; + } + + ret = prueth_start(prueth->txpru[slice], firmwares[slice].txpru); + if (ret) { + dev_err(dev, "failed to boot TX_PRU%d: %d\n", slice, ret); + rproc_shutdown(prueth->rtu[slice]); + rproc_shutdown(prueth->pru[slice]); + goto unwind_slices; + } } - ret = icssg_config(prueth, emac, slice); - if (ret) - return ret; + return 0; - ret = rproc_set_firmware(prueth->pru[slice], firmwares[slice].pru); - ret = rproc_boot(prueth->pru[slice]); - if (ret) { - dev_err(dev, "failed to boot PRU%d: %d\n", slice, ret); - return -EINVAL; +unwind_slices: + while (--slice >= 0) { + prueth_shutdown(prueth->txpru[slice]); + prueth_shutdown(prueth->rtu[slice]); + prueth_shutdown(prueth->pru[slice]); } - ret = rproc_set_firmware(prueth->rtu[slice], firmwares[slice].rtu); - ret = rproc_boot(prueth->rtu[slice]); - if (ret) { - dev_err(dev, "failed to boot RTU%d: %d\n", slice, ret); - goto halt_pru; + return ret; +} + +static void prueth_emac_stop(struct prueth *prueth) +{ + int slice; + + for (slice = 0; slice < PRUETH_NUM_MACS; slice++) { + prueth_shutdown(prueth->txpru[slice]); + prueth_shutdown(prueth->rtu[slice]); + prueth_shutdown(prueth->pru[slice]); } +} + +static int prueth_emac_common_start(struct prueth *prueth) +{ + struct prueth_emac *emac; + int ret = 0; + int slice; + + if (!prueth->emac[ICSS_SLICE0] && !prueth->emac[ICSS_SLICE1]) + return -EINVAL; + + /* clear SMEM and MSMC settings for all slices */ + memset_io(prueth->msmcram.va, 0, prueth->msmcram.size); + memset_io(prueth->shram.va, 0, ICSSG_CONFIG_OFFSET_SLICE1 * PRUETH_NUM_MACS); + + icssg_class_default(prueth->miig_rt, ICSS_SLICE0, 0, false); + icssg_class_default(prueth->miig_rt, ICSS_SLICE1, 0, false); + + if (prueth->is_switch_mode || prueth->is_hsr_offload_mode) + icssg_init_fw_offload_mode(prueth); + else + icssg_init_emac_mode(prueth); + + for (slice = 0; slice < PRUETH_NUM_MACS; slice++) { + emac = prueth->emac[slice]; + if (!emac) + continue; + ret = icssg_config(prueth, emac, slice); + if (ret) + goto disable_class; + } + + ret = prueth_emac_start(prueth); + if (ret) + goto disable_class; - ret = rproc_set_firmware(prueth->txpru[slice], firmwares[slice].txpru); - ret = rproc_boot(prueth->txpru[slice]); + emac = prueth->emac[ICSS_SLICE0] ? prueth->emac[ICSS_SLICE0] : + prueth->emac[ICSS_SLICE1]; + ret = icss_iep_init(emac->iep, &prueth_iep_clockops, + emac, IEP_DEFAULT_CYCLE_TIME_NS); if (ret) { - dev_err(dev, "failed to boot TX_PRU%d: %d\n", slice, ret); - goto halt_rtu; + dev_err(prueth->dev, "Failed to initialize IEP module\n"); + goto stop_pruss; } - emac->fw_running = 1; return 0; -halt_rtu: - rproc_shutdown(prueth->rtu[slice]); +stop_pruss: + prueth_emac_stop(prueth); -halt_pru: - rproc_shutdown(prueth->pru[slice]); +disable_class: + icssg_class_disable(prueth->miig_rt, ICSS_SLICE0); + icssg_class_disable(prueth->miig_rt, ICSS_SLICE1); return ret; } +static int prueth_emac_common_stop(struct prueth *prueth) +{ + struct prueth_emac *emac; + + if (!prueth->emac[ICSS_SLICE0] && !prueth->emac[ICSS_SLICE1]) + return -EINVAL; + + icssg_class_disable(prueth->miig_rt, ICSS_SLICE0); + icssg_class_disable(prueth->miig_rt, ICSS_SLICE1); + + prueth_emac_stop(prueth); + + emac = prueth->emac[ICSS_SLICE0] ? prueth->emac[ICSS_SLICE0] : + prueth->emac[ICSS_SLICE1]; + icss_iep_exit(emac->iep); + + return 0; +} + /* called back by PHY layer if there is change in link state of hw port*/ static void emac_adjust_link(struct net_device *ndev) { @@ -374,9 +466,6 @@ static void prueth_iep_settime(void *clockops_data, u64 ns) u32 cycletime; int timeout; - if (!emac->fw_running) - return; - sc_descp = emac->prueth->shram.va + TIMESYNC_FW_WC_SETCLOCK_DESC_OFFSET; cycletime = IEP_DEFAULT_CYCLE_TIME_NS; @@ -543,23 +632,17 @@ static int emac_ndo_open(struct net_device *ndev) { struct prueth_emac *emac = netdev_priv(ndev); int ret, i, num_data_chn = emac->tx_ch_num; + struct icssg_flow_cfg __iomem *flow_cfg; struct prueth *prueth = emac->prueth; int slice = prueth_emac_slice(emac); struct device *dev = prueth->dev; int max_rx_flows; int rx_flow; - /* clear SMEM and MSMC settings for all slices */ - if (!prueth->emacs_initialized) { - memset_io(prueth->msmcram.va, 0, prueth->msmcram.size); - memset_io(prueth->shram.va, 0, ICSSG_CONFIG_OFFSET_SLICE1 * PRUETH_NUM_MACS); - } - /* set h/w MAC as user might have re-configured */ ether_addr_copy(emac->mac_addr, ndev->dev_addr); icssg_class_set_mac_addr(prueth->miig_rt, slice, emac->mac_addr); - icssg_class_default(prueth->miig_rt, slice, 0, false); icssg_ft1_set_mac_addr(prueth->miig_rt, slice, emac->mac_addr); /* Notify the stack of the actual queue counts. */ @@ -597,18 +680,23 @@ static int emac_ndo_open(struct net_device *ndev) goto cleanup_napi; } - /* reset and start PRU firmware */ - ret = prueth_emac_start(prueth, emac); - if (ret) - goto free_rx_irq; + if (!prueth->emacs_initialized) { + ret = prueth_emac_common_start(prueth); + if (ret) + goto free_rx_irq; + } - icssg_mii_update_mtu(prueth->mii_rt, slice, ndev->max_mtu); + flow_cfg = emac->dram.va + ICSSG_CONFIG_OFFSET + PSI_L_REGULAR_FLOW_ID_BASE_OFFSET; + writew(emac->rx_flow_id_base, &flow_cfg->rx_base_flow); + ret = emac_fdb_flow_id_updated(emac); - if (!prueth->emacs_initialized) { - ret = icss_iep_init(emac->iep, &prueth_iep_clockops, - emac, IEP_DEFAULT_CYCLE_TIME_NS); + if (ret) { + netdev_err(ndev, "Failed to update Rx Flow ID %d", ret); + goto stop; } + icssg_mii_update_mtu(prueth->mii_rt, slice, ndev->max_mtu); + ret = request_threaded_irq(emac->tx_ts_irq, NULL, prueth_tx_ts_irq, IRQF_ONESHOT, dev_name(dev), emac); if (ret) @@ -653,7 +741,8 @@ reset_rx_chn: free_tx_ts_irq: free_irq(emac->tx_ts_irq, emac); stop: - prueth_emac_stop(emac); + if (!prueth->emacs_initialized) + prueth_emac_common_stop(prueth); free_rx_irq: free_irq(emac->rx_chns.irq[rx_flow], emac); cleanup_napi: @@ -689,8 +778,6 @@ static int emac_ndo_stop(struct net_device *ndev) if (ndev->phydev) phy_stop(ndev->phydev); - icssg_class_disable(prueth->miig_rt, prueth_emac_slice(emac)); - if (emac->prueth->is_hsr_offload_mode) __dev_mc_unsync(ndev, icssg_prueth_hsr_del_mcast); else @@ -728,11 +815,9 @@ static int emac_ndo_stop(struct net_device *ndev) /* Destroying the queued work in ndo_stop() */ cancel_delayed_work_sync(&emac->stats_work); - if (prueth->emacs_initialized == 1) - icss_iep_exit(emac->iep); - /* stop PRUs */ - prueth_emac_stop(emac); + if (prueth->emacs_initialized == 1) + prueth_emac_common_stop(prueth); free_irq(emac->tx_ts_irq, emac); @@ -1053,10 +1138,11 @@ static void prueth_offload_fwd_mark_update(struct prueth *prueth) } } -static void prueth_emac_restart(struct prueth *prueth) +static int prueth_emac_restart(struct prueth *prueth) { struct prueth_emac *emac0 = prueth->emac[PRUETH_MAC0]; struct prueth_emac *emac1 = prueth->emac[PRUETH_MAC1]; + int ret; /* Detach the net_device for both PRUeth ports*/ if (netif_running(emac0->ndev)) @@ -1065,36 +1151,46 @@ static void prueth_emac_restart(struct prueth *prueth) netif_device_detach(emac1->ndev); /* Disable both PRUeth ports */ - icssg_set_port_state(emac0, ICSSG_EMAC_PORT_DISABLE); - icssg_set_port_state(emac1, ICSSG_EMAC_PORT_DISABLE); + ret = icssg_set_port_state(emac0, ICSSG_EMAC_PORT_DISABLE); + ret |= icssg_set_port_state(emac1, ICSSG_EMAC_PORT_DISABLE); + if (ret) + return ret; /* Stop both pru cores for both PRUeth ports*/ - prueth_emac_stop(emac0); - prueth->emacs_initialized--; - prueth_emac_stop(emac1); - prueth->emacs_initialized--; + ret = prueth_emac_common_stop(prueth); + if (ret) { + dev_err(prueth->dev, "Failed to stop the firmwares"); + return ret; + } /* Start both pru cores for both PRUeth ports */ - prueth_emac_start(prueth, emac0); - prueth->emacs_initialized++; - prueth_emac_start(prueth, emac1); - prueth->emacs_initialized++; + ret = prueth_emac_common_start(prueth); + if (ret) { + dev_err(prueth->dev, "Failed to start the firmwares"); + return ret; + } /* Enable forwarding for both PRUeth ports */ - icssg_set_port_state(emac0, ICSSG_EMAC_PORT_FORWARD); - icssg_set_port_state(emac1, ICSSG_EMAC_PORT_FORWARD); + ret = icssg_set_port_state(emac0, ICSSG_EMAC_PORT_FORWARD); + ret |= icssg_set_port_state(emac1, ICSSG_EMAC_PORT_FORWARD); /* Attache net_device for both PRUeth ports */ netif_device_attach(emac0->ndev); netif_device_attach(emac1->ndev); + + return ret; } static void icssg_change_mode(struct prueth *prueth) { struct prueth_emac *emac; - int mac; + int mac, ret; - prueth_emac_restart(prueth); + ret = prueth_emac_restart(prueth); + if (ret) { + dev_err(prueth->dev, "Failed to restart the firmwares, aborting the process"); + return; + } for (mac = PRUETH_MAC0; mac < PRUETH_NUM_MACS; mac++) { emac = prueth->emac[mac]; @@ -1173,13 +1269,18 @@ static void prueth_netdevice_port_unlink(struct net_device *ndev) { struct prueth_emac *emac = netdev_priv(ndev); struct prueth *prueth = emac->prueth; + int ret; prueth->br_members &= ~BIT(emac->port_id); if (prueth->is_switch_mode) { prueth->is_switch_mode = false; emac->port_vlan = 0; - prueth_emac_restart(prueth); + ret = prueth_emac_restart(prueth); + if (ret) { + dev_err(prueth->dev, "Failed to restart the firmwares, aborting the process"); + return; + } } prueth_offload_fwd_mark_update(prueth); @@ -1228,6 +1329,7 @@ static void prueth_hsr_port_unlink(struct net_device *ndev) struct prueth *prueth = emac->prueth; struct prueth_emac *emac0; struct prueth_emac *emac1; + int ret; emac0 = prueth->emac[PRUETH_MAC0]; emac1 = prueth->emac[PRUETH_MAC1]; @@ -1238,7 +1340,11 @@ static void prueth_hsr_port_unlink(struct net_device *ndev) emac0->port_vlan = 0; emac1->port_vlan = 0; prueth->hsr_dev = NULL; - prueth_emac_restart(prueth); + ret = prueth_emac_restart(prueth); + if (ret) { + dev_err(prueth->dev, "Failed to restart the firmwares, aborting the process"); + return; + } netdev_dbg(ndev, "Disabling HSR Offload mode\n"); } } @@ -1413,13 +1519,10 @@ static int prueth_probe(struct platform_device *pdev) prueth->pa_stats = NULL; } - if (eth0_node) { + if (eth0_node || eth1_node) { ret = prueth_get_cores(prueth, ICSS_SLICE0, false); if (ret) goto put_cores; - } - - if (eth1_node) { ret = prueth_get_cores(prueth, ICSS_SLICE1, false); if (ret) goto put_cores; @@ -1618,14 +1721,12 @@ put_pruss: pruss_put(prueth->pruss); put_cores: - if (eth1_node) { - prueth_put_cores(prueth, ICSS_SLICE1); - of_node_put(eth1_node); - } - - if (eth0_node) { + if (eth0_node || eth1_node) { prueth_put_cores(prueth, ICSS_SLICE0); of_node_put(eth0_node); + + prueth_put_cores(prueth, ICSS_SLICE1); + of_node_put(eth1_node); } return ret; diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.h b/drivers/net/ethernet/ti/icssg/icssg_prueth.h index f5c1d473e9f9..5473315ea204 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.h +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.h @@ -140,7 +140,6 @@ struct prueth_rx_chn { /* data for each emac port */ struct prueth_emac { bool is_sr1; - bool fw_running; struct prueth *prueth; struct net_device *ndev; u8 mac_addr[6]; @@ -361,6 +360,8 @@ int icssg_set_port_state(struct prueth_emac *emac, enum icssg_port_state_cmd state); void icssg_config_set_speed(struct prueth_emac *emac); void icssg_config_half_duplex(struct prueth_emac *emac); +void icssg_init_emac_mode(struct prueth *prueth); +void icssg_init_fw_offload_mode(struct prueth *prueth); /* Buffer queue helpers */ int icssg_queue_pop(struct prueth *prueth, u8 queue); @@ -377,6 +378,7 @@ void icssg_vtbl_modify(struct prueth_emac *emac, u8 vid, u8 port_mask, u8 untag_mask, bool add); u16 icssg_get_pvid(struct prueth_emac *emac); void icssg_set_pvid(struct prueth *prueth, u8 vid, u8 port); +int emac_fdb_flow_id_updated(struct prueth_emac *emac); #define prueth_napi_to_tx_chn(pnapi) \ container_of(pnapi, struct prueth_tx_chn, napi_tx) @@ -407,7 +409,6 @@ void emac_rx_timestamp(struct prueth_emac *emac, struct sk_buff *skb, u32 *psdata); enum netdev_tx icssg_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev); irqreturn_t prueth_rx_irq(int irq, void *dev_id); -void prueth_emac_stop(struct prueth_emac *emac); void prueth_cleanup_tx_ts(struct prueth_emac *emac); int icssg_napi_rx_poll(struct napi_struct *napi_rx, int budget); int prueth_prepare_rx_chan(struct prueth_emac *emac, diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c index 5024f0647a0d..3dc86397c367 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth_sr1.c @@ -440,7 +440,6 @@ static int prueth_emac_start(struct prueth *prueth, struct prueth_emac *emac) goto halt_pru; } - emac->fw_running = 1; return 0; halt_pru: @@ -449,6 +448,29 @@ halt_pru: return ret; } +static void prueth_emac_stop(struct prueth_emac *emac) +{ + struct prueth *prueth = emac->prueth; + int slice; + + switch (emac->port_id) { + case PRUETH_PORT_MII0: + slice = ICSS_SLICE0; + break; + case PRUETH_PORT_MII1: + slice = ICSS_SLICE1; + break; + default: + netdev_err(emac->ndev, "invalid port\n"); + return; + } + + if (!emac->is_sr1) + rproc_shutdown(prueth->txpru[slice]); + rproc_shutdown(prueth->rtu[slice]); + rproc_shutdown(prueth->pru[slice]); +} + /** * emac_ndo_open - EMAC device open * @ndev: network adapter device -- 2.51.0 From 9b115361248dc6cce182a2dc030c1c70b0a9639e Mon Sep 17 00:00:00 2001 From: Meghana Malladi Date: Mon, 23 Dec 2024 20:45:50 +0530 Subject: [PATCH 04/16] net: ti: icssg-prueth: Fix clearing of IEP_CMP_CFG registers during iep_init When ICSSG interfaces are brought down and brought up again, the pru cores are shut down and booted again, flushing out all the memories and start again in a clean state. Hence it is expected that the IEP_CMP_CFG register needs to be flushed during iep_init() to ensure that the existing residual configuration doesn't cause any unusual behavior. If the register is not cleared, existing IEP_CMP_CFG set for CMP1 will result in SYNC0_OUT signal based on the SYNC_OUT register values. After bringing the interface up, calling PPS enable doesn't work as the driver believes PPS is already enabled, (iep->pps_enabled is not cleared during interface bring down) and driver will just return true even though there is no signal. Fix this by disabling pps and perout. Fixes: c1e0230eeaab ("net: ti: icss-iep: Add IEP driver") Signed-off-by: Meghana Malladi Reviewed-by: Roger Quadros Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/icssg/icss_iep.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index 5d6d1cf78e93..768578c0d958 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -215,6 +215,9 @@ static void icss_iep_enable_shadow_mode(struct icss_iep *iep) for (cmp = IEP_MIN_CMP; cmp < IEP_MAX_CMP; cmp++) { regmap_update_bits(iep->map, ICSS_IEP_CMP_STAT_REG, IEP_CMP_STATUS(cmp), IEP_CMP_STATUS(cmp)); + + regmap_update_bits(iep->map, ICSS_IEP_CMP_CFG_REG, + IEP_CMP_CFG_CMP_EN(cmp), 0); } /* enable reset counter on CMP0 event */ @@ -780,6 +783,11 @@ int icss_iep_exit(struct icss_iep *iep) } icss_iep_disable(iep); + if (iep->pps_enabled) + icss_iep_pps_enable(iep, false); + else if (iep->perout_enabled) + icss_iep_perout_enable(iep, NULL, false); + return 0; } EXPORT_SYMBOL_GPL(icss_iep_exit); -- 2.51.0 From 45d339fefaa3dcd237038769e0d34584fb867390 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 19 Dec 2024 14:23:36 +0200 Subject: [PATCH 05/16] RDMA/mlx5: Enable multiplane mode only when it is supported Driver queries vport_cxt.num_plane and enables multiplane when it is greater then 0, but some old FWs (versions from x.40.1000 till x.42.1000), report vport_cxt.num_plane = 1 unexpectedly. Fix it by querying num_plane only when HCA_CAP2.multiplane bit is set. Fixes: 2a5db20fa532 ("RDMA/mlx5: Add support to multi-plane device and port") Link: https://patch.msgid.link/r/1ef901acdf564716fcf550453cf5e94f343777ec.1734610916.git.leon@kernel.org Cc: stable@vger.kernel.org Reported-by: Francesco Poli Closes: https://lore.kernel.org/all/nvs4i2v7o6vn6zhmtq4sgazy2hu5kiulukxcntdelggmznnl7h@so3oul6uwgbl/ Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky Reviewed-by: Michal Swiatkowski Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- include/linux/mlx5/mlx5_ifc.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index c2314797afc9..f5b59d02f4d3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2839,7 +2839,7 @@ static int mlx5_ib_get_plane_num(struct mlx5_core_dev *mdev, u8 *num_plane) int err; *num_plane = 0; - if (!MLX5_CAP_GEN(mdev, ib_virt)) + if (!MLX5_CAP_GEN(mdev, ib_virt) || !MLX5_CAP_GEN_2(mdev, multiplane)) return 0; err = mlx5_query_hca_vport_context(mdev, 0, 1, 0, &vport_ctx); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4fbbcf35498b..48d47181c7cd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -2119,7 +2119,9 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 migration_in_chunks[0x1]; u8 reserved_at_d1[0x1]; u8 sf_eq_usage[0x1]; - u8 reserved_at_d3[0xd]; + u8 reserved_at_d3[0x5]; + u8 multiplane[0x1]; + u8 reserved_at_d9[0x7]; u8 cross_vhca_object_to_object_supported[0x20]; -- 2.51.0 From ed123c948d06688d10f3b10a7bce1d6fbfd1ed07 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 3 Jan 2025 09:29:09 -0700 Subject: [PATCH 06/16] io_uring/kbuf: use pre-committed buffer address for non-pollable file For non-pollable files, buffer ring consumption will commit upfront. This is fine, but io_ring_buffer_select() will return the address of the buffer after having committed it. For incrementally consumed buffers, this is incorrect as it will modify the buffer address. Store the pre-committed value and return that. If that isn't done, then the initial part of the buffer is not used and the application will correctly assume the content arrived at the start of the userspace buffer, but the kernel will have put it later in the buffer. Or it can cause a spurious -EFAULT returned in the CQE, depending on the buffer size. As bounds are suitably checked for doing the actual IO, no adverse side effects are possible - it's just a data misplacement within the existing buffer. Reported-by: Gwendal Fernet Cc: stable@vger.kernel.org Fixes: ae98dbf43d75 ("io_uring/kbuf: add support for incremental buffer consumption") Signed-off-by: Jens Axboe --- io_uring/kbuf.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index d407576ddfb7..eec5eb7de843 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -139,6 +139,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, struct io_uring_buf_ring *br = bl->buf_ring; __u16 tail, head = bl->head; struct io_uring_buf *buf; + void __user *ret; tail = smp_load_acquire(&br->tail); if (unlikely(tail == head)) @@ -153,6 +154,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; req->buf_list = bl; req->buf_index = buf->bid; + ret = u64_to_user_ptr(buf->addr); if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { /* @@ -168,7 +170,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, io_kbuf_commit(req, bl, *len, 1); req->buf_list = NULL; } - return u64_to_user_ptr(buf->addr); + return ret; } void __user *io_buffer_select(struct io_kiocb *req, size_t *len, -- 2.51.0 From 385443057f475e775fe1c66e77d4be9727f40973 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 3 Jan 2025 19:20:23 +0100 Subject: [PATCH 07/16] kbuild: pacman-pkg: provide versioned linux-api-headers package MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The Arch Linux glibc package contains a versioned dependency on "linux-api-headers". If the linux-api-headers package provided by pacman-pkg does not specify an explicit version this dependency is not satisfied. Fix the dependency by providing an explicit version. Fixes: c8578539deba ("kbuild: add script and target to generate pacman package") Signed-off-by: Thomas Weißschuh Reviewed-by: Nathan Chancellor Signed-off-by: Masahiro Yamada --- scripts/package/PKGBUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD index f83493838cf9..dca706617adc 100644 --- a/scripts/package/PKGBUILD +++ b/scripts/package/PKGBUILD @@ -103,7 +103,7 @@ _package-headers() { _package-api-headers() { pkgdesc="Kernel headers sanitized for use in userspace" - provides=(linux-api-headers) + provides=(linux-api-headers="${pkgver}") conflicts=(linux-api-headers) _prologue -- 2.51.0 From 9d89551994a430b50c4fffcb1e617a057fa76e20 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 5 Jan 2025 14:13:40 -0800 Subject: [PATCH 08/16] Linux 6.13-rc6 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 48e89108aa58..7904d5d88088 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc6 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From cd6313beaeaea0b2e6d428afef7a86a986b50abe Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 6 Jan 2025 06:10:24 -0800 Subject: [PATCH 09/16] Revert "vmstat: disable vmstat_work on vmstat_cpu_down_prep()" This reverts commit adcfb264c3ed51fbbf5068ddf10d309a63683868. It turns out this just causes a different warning splat instead that seems to be much easier to trigger, so let's revert ASAP. Reported-and-bisected-by: Borislav Petkov Tested-by: Breno Leitao Reported-by: Alexander Gordeev Link: https://lore.kernel.org/all/20250106131817.GAZ3vYGVr3-hWFFPLj@fat_crate.local/ Cc: Koichiro Den Cc: Sebastian Andrzej Siewior Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmstat.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/vmstat.c b/mm/vmstat.c index 0889b75cef14..4d016314a56c 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -2148,14 +2148,13 @@ static int vmstat_cpu_online(unsigned int cpu) if (!node_state(cpu_to_node(cpu), N_CPU)) { node_set_state(cpu_to_node(cpu), N_CPU); } - enable_delayed_work(&per_cpu(vmstat_work, cpu)); return 0; } static int vmstat_cpu_down_prep(unsigned int cpu) { - disable_delayed_work_sync(&per_cpu(vmstat_work, cpu)); + cancel_delayed_work_sync(&per_cpu(vmstat_work, cpu)); return 0; } -- 2.51.0 From 76d5d4c53e68719c018691b19a961e78524a155c Mon Sep 17 00:00:00 2001 From: Guo Weikang Date: Fri, 27 Dec 2024 17:23:10 +0800 Subject: [PATCH 10/16] mm/kmemleak: fix percpu memory leak detection failure kmemleak_alloc_percpu gives an incorrect min_count parameter, causing percpu memory to be considered a gray object. Link: https://lkml.kernel.org/r/20241227092311.3572500-1-guoweikang.kernel@gmail.com Fixes: 8c8685928910 ("mm/kmemleak: use IS_ERR_PCPU() for pointer in the percpu address space") Signed-off-by: Guo Weikang Acked-by: Uros Bizjak Acked-by: Catalin Marinas Cc: Guo Weikang Cc: Signed-off-by: Andrew Morton --- mm/kmemleak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 737af23f4f4e..820ba3b5cbfc 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1093,7 +1093,7 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size, pr_debug("%s(0x%px, %zu)\n", __func__, ptr, size); if (kmemleak_enabled && ptr && !IS_ERR_PCPU(ptr)) - create_object_percpu((__force unsigned long)ptr, size, 0, gfp); + create_object_percpu((__force unsigned long)ptr, size, 1, gfp); } EXPORT_SYMBOL_GPL(kmemleak_alloc_percpu); -- 2.51.0 From 2bff77c665edd854a09c479effe75b3b0e3fedef Mon Sep 17 00:00:00 2001 From: Luca Ceresoli Date: Mon, 30 Dec 2024 22:55:10 +0100 Subject: [PATCH 11/16] scripts/decode_stacktrace.sh: fix decoding of lines with an additional info Since commit bdf8eafbf7f5 ("arm64: stacktrace: report source of unwind data") a stack trace line can contain an additional info field that was not present before, in the form of one or more letters in parentheses. E.g.: [ 504.517915] led_sysfs_enable+0x54/0x80 (P) ^^^ When this is present, decode_stacktrace decodes the line incorrectly: [ 504.517915] led_sysfs_enable+0x54/0x80 P Extend parsing to decode it correctly: [ 504.517915] led_sysfs_enable (drivers/leds/led-core.c:455 (discriminator 7)) (P) The regex to match such lines assumes the info can be extended in the future to other uppercase characters, and will need to be extended in case other characters will be used. Using a much more generic regex might incur in false positives, so this looked like a good tradeoff. Link: https://lkml.kernel.org/r/20241230-decode_stacktrace-fix-info-v1-1-984910659173@bootlin.com Fixes: bdf8eafbf7f5 ("arm64: stacktrace: report source of unwind data") Signed-off-by: Luca Ceresoli Cc: Catalin Marinas Cc: Mark Brown Cc: Mark Rutland Cc: Miroslav Benes Cc: Puranjay Mohan Cc: Thomas Petazzoni Signed-off-by: Andrew Morton --- scripts/decode_stacktrace.sh | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh index 46fa18b80fc1..17abc4e7a985 100755 --- a/scripts/decode_stacktrace.sh +++ b/scripts/decode_stacktrace.sh @@ -286,6 +286,18 @@ handle_line() { last=$(( $last - 1 )) fi + # Extract info after the symbol if present. E.g.: + # func_name+0x54/0x80 (P) + # ^^^ + # The regex assumes only uppercase letters will be used. To be + # extended if needed. + local info_str="" + if [[ ${words[$last]} =~ \([A-Z]*\) ]]; then + info_str=${words[$last]} + unset words[$last] + last=$(( $last - 1 )) + fi + if [[ ${words[$last]} =~ \[([^]]+)\] ]]; then module=${words[$last]} # some traces format is "(%pS)", which like "(foo+0x0/0x1 [bar])" @@ -313,9 +325,9 @@ handle_line() { # Add up the line number to the symbol if [[ -z ${module} ]] then - echo "${words[@]}" "$symbol" + echo "${words[@]}" "$symbol ${info_str}" else - echo "${words[@]}" "$symbol $module" + echo "${words[@]}" "$symbol $module ${info_str}" fi } -- 2.51.0 From 264a88cafdbd0f4579af903145ac605d030f3f66 Mon Sep 17 00:00:00 2001 From: Honggyu Kim Date: Fri, 27 Dec 2024 18:57:37 +0900 Subject: [PATCH 12/16] mm/mempolicy: count MPOL_WEIGHTED_INTERLEAVE to "interleave_hit" Commit fa3bea4e1f82 introduced MPOL_WEIGHTED_INTERLEAVE but it missed adding its counter to "interleave_hit" of numastat, which is located at /sys/devices/system/node/nodeN/ directory. It'd be better to add weighted interleving counter info to the existing "interleave_hit" instead of introducing a new counter "weighted_interleave_hit". Link: https://lkml.kernel.org/r/20241227095737.645-1-honggyu.kim@sk.com Fixes: fa3bea4e1f82 ("mm/mempolicy: introduce MPOL_WEIGHTED_INTERLEAVE for weighted interleaving") Signed-off-by: Honggyu Kim Reviewed-by: Gregory Price Reviewed-by: Hyeonggon Yoo Tested-by: Yunjeong Mun Cc: Andi Kleen Signed-off-by: Andrew Morton --- mm/mempolicy.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 04f35659717a..162407fbf2bc 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2268,7 +2268,8 @@ struct page *alloc_pages_mpol_noprof(gfp_t gfp, unsigned int order, page = __alloc_pages_noprof(gfp, order, nid, nodemask); - if (unlikely(pol->mode == MPOL_INTERLEAVE) && page) { + if (unlikely(pol->mode == MPOL_INTERLEAVE || + pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) { /* skip NUMA_INTERLEAVE_HIT update if numa stats is disabled */ if (static_branch_likely(&vm_numa_stat_key) && page_to_nid(page) == nid) { -- 2.51.0 From 4bbb6df62c54e6a2c1fcce4908df768f0cfa1e91 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 27 Dec 2024 14:22:20 -0800 Subject: [PATCH 13/16] tools: fix atomic_set() definition to set the value correctly Currently vma test is failing because of the new vma_assert_attached() assertion. The check is failing because previous refcount_set() inside vma_mark_attached() is a NoOp. Fix the definition of atomic_set() to correctly set the value of the atomic. Link: https://lkml.kernel.org/r/20241227222220.1726384-1-surenb@google.com Fixes: 9325b8b5a1cb ("tools: add skeleton code for userland testing of VMA logic") Signed-off-by: Suren Baghdasaryan Reviewed-by: Lorenzo Stoakes Cc: Jann Horn Cc: Liam R. Howlett Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton --- tools/testing/shared/linux/maple_tree.h | 2 +- tools/testing/vma/linux/atomic.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/shared/linux/maple_tree.h b/tools/testing/shared/linux/maple_tree.h index 06c89bdcc515..f67d47d32857 100644 --- a/tools/testing/shared/linux/maple_tree.h +++ b/tools/testing/shared/linux/maple_tree.h @@ -2,6 +2,6 @@ #define atomic_t int32_t #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) do {} while (0) +#define atomic_set(x, y) uatomic_set(x, y) #define U8_MAX UCHAR_MAX #include "../../../../include/linux/maple_tree.h" diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h index e01f66f98982..3e1b6adc027b 100644 --- a/tools/testing/vma/linux/atomic.h +++ b/tools/testing/vma/linux/atomic.h @@ -6,7 +6,7 @@ #define atomic_t int32_t #define atomic_inc(x) uatomic_inc(x) #define atomic_read(x) uatomic_read(x) -#define atomic_set(x, y) do {} while (0) +#define atomic_set(x, y) uatomic_set(x, y) #define U8_MAX UCHAR_MAX #endif /* _LINUX_ATOMIC_H */ -- 2.51.0 From f505e6c91e7a22d10316665a86d79f84d9f0ba76 Mon Sep 17 00:00:00 2001 From: Marco Nelissen Date: Thu, 2 Jan 2025 11:04:11 -0800 Subject: [PATCH 14/16] filemap: avoid truncating 64-bit offset to 32 bits On 32-bit kernels, folio_seek_hole_data() was inadvertently truncating a 64-bit value to 32 bits, leading to a possible infinite loop when writing to an xfs filesystem. Link: https://lkml.kernel.org/r/20250102190540.1356838-1-marco.nelissen@gmail.com Fixes: 54fa39ac2e00 ("iomap: use mapping_seek_hole_data") Signed-off-by: Marco Nelissen Cc: Matthew Wilcox (Oracle) Cc: Signed-off-by: Andrew Morton --- mm/filemap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/filemap.c b/mm/filemap.c index 33b60d448fca..118fa1e0bafe 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2996,7 +2996,7 @@ static inline loff_t folio_seek_hole_data(struct xa_state *xas, if (ops->is_partially_uptodate(folio, offset, bsz) == seek_data) break; - start = (start + bsz) & ~(bsz - 1); + start = (start + bsz) & ~((u64)bsz - 1); offset += bsz; } while (offset < folio_size(folio)); unlock: -- 2.51.0 From 59f59108475e8fadd5f9fc9a5d79563203df40f1 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 3 Jan 2025 07:56:31 +0100 Subject: [PATCH 15/16] x86/execmem: fix ROX cache usage in Xen PV guests MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The recently introduced ROX cache for modules is assuming large page support in 64-bit mode without testing the related feature bit. This results in breakage when running as a Xen PV guest, as in this mode large pages are not supported. Fix that by testing the X86_FEATURE_PSE capability when deciding whether to enable the ROX cache. Link: https://lkml.kernel.org/r/20250103065631.26459-1-jgross@suse.com Fixes: 2e45474ab14f ("execmem: add support for cache of large ROX pages") Signed-off-by: Juergen Gross Reported-by: Marek Marczykowski-Górecki Tested-by: Marek Marczykowski-Górecki Cc: Luis Chamberlain Cc: Andy Lutomirski Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Mike Rapoport (Microsoft) Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- arch/x86/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c6d29f283001..62aa4d66a032 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -1080,7 +1080,8 @@ struct execmem_info __init *execmem_arch_setup(void) start = MODULES_VADDR + offset; - if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX)) { + if (IS_ENABLED(CONFIG_ARCH_HAS_EXECMEM_ROX) && + cpu_feature_enabled(X86_FEATURE_PSE)) { pgprot = PAGE_KERNEL_ROX; flags = EXECMEM_KASAN_SHADOW | EXECMEM_ROX_CACHE; } else { -- 2.51.0 From 4ce718f39776b5e162ea6b8010da7a6f02b13dc0 Mon Sep 17 00:00:00 2001 From: Stefan Roesch Date: Fri, 3 Jan 2025 17:20:37 -0800 Subject: [PATCH 16/16] mm: fix div by zero in bdi_ratio_from_pages During testing it has been detected, that it is possible to get div by zero error in bdi_set_min_bytes. The error is caused by the function bdi_ratio_from_pages(). bdi_ratio_from_pages() calls global_dirty_limits. If the dirty threshold is 0, the div by zero is raised. This can happen if the root user is setting: echo 0 > /proc/sys/vm/dirty_ratio The following is a test case: echo 0 > /proc/sys/vm/dirty_ratio cd /sys/class/bdi/ echo 1 > strict_limit echo 8192 > min_bytes ==> error is raised. The problem is addressed by returning -EINVAL if dirty_ratio or dirty_bytes is set to 0. [shr@devkernel.io: check for -EINVAL in bdi_set_min_bytes() and bdi_set_max_bytes()] Link: https://lkml.kernel.org/r/20250108014723.166637-1-shr@devkernel.io [shr@devkernel.io: v3] Link: https://lkml.kernel.org/r/20250109063411.6591-1-shr@devkernel.io Link: https://lkml.kernel.org/r/20250104012037.159386-1-shr@devkernel.io Signed-off-by: Stefan Roesch Reported-by: cheung wall Closes: https://lore.kernel.org/linux-mm/87pll35yd0.fsf@devkernel.io/T/#t Acked-by: David Hildenbrand Cc: Matthew Wilcox (Oracle) Cc: Qiang Zhang Signed-off-by: Andrew Morton --- mm/page-writeback.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d213ead95675..d9861e42b2bd 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -692,6 +692,8 @@ static unsigned long bdi_ratio_from_pages(unsigned long pages) unsigned long ratio; global_dirty_limits(&background_thresh, &dirty_thresh); + if (!dirty_thresh) + return -EINVAL; ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh); return ratio; @@ -790,13 +792,15 @@ int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes) { int ret; unsigned long pages = min_bytes >> PAGE_SHIFT; - unsigned long min_ratio; + long min_ratio; ret = bdi_check_pages_limit(pages); if (ret) return ret; min_ratio = bdi_ratio_from_pages(pages); + if (min_ratio < 0) + return min_ratio; return __bdi_set_min_ratio(bdi, min_ratio); } @@ -809,13 +813,15 @@ int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes) { int ret; unsigned long pages = max_bytes >> PAGE_SHIFT; - unsigned long max_ratio; + long max_ratio; ret = bdi_check_pages_limit(pages); if (ret) return ret; max_ratio = bdi_ratio_from_pages(pages); + if (max_ratio < 0) + return max_ratio; return __bdi_set_max_ratio(bdi, max_ratio); } -- 2.51.0