From 768776dd4efc681cdca33a79e29bb508d6de9bc0 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Mon, 16 Dec 2024 16:16:40 +0100 Subject: [PATCH 01/16] i2c: imx: fix missing stop condition in single-master mode A regression was introduced with the implementation of single-master mode, preventing proper stop conditions from being generated. Devices that require a valid stop condition, such as EEPROMs, fail to function correctly as a result. The issue only affects devices with the single-master property enabled. This commit resolves the issue by re-enabling I2C bus busy bit (IBB) polling for single-master mode when generating a stop condition. The fix further ensures that the i2c_imx->stopped flag is cleared at the start of each transfer, allowing the stop condition to be correctly generated in i2c_imx_stop(). According to the reference manual (IMX8MMRM, Rev. 2, 09/2019, page 5270), polling the IBB bit to determine if the bus is free is only necessary in multi-master mode. Consequently, the IBB bit is not polled for the start condition in single-master mode. Fixes: 6692694aca86 ("i2c: imx: do not poll for bus busy in single master mode") Signed-off-by: Stefan Eichenberger Reviewed-by: Frank Li Reviewed-by: Francesco Dolcini Link: https://lore.kernel.org/r/20241216151829.74056-1-eichest@gmail.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-imx.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index f751d231ded8..488ee3511314 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -532,22 +532,20 @@ static void i2c_imx_dma_free(struct imx_i2c_struct *i2c_imx) static int i2c_imx_bus_busy(struct imx_i2c_struct *i2c_imx, int for_busy, bool atomic) { + bool multi_master = i2c_imx->multi_master; unsigned long orig_jiffies = jiffies; unsigned int temp; - if (!i2c_imx->multi_master) - return 0; - while (1) { temp = imx_i2c_read_reg(i2c_imx, IMX_I2C_I2SR); /* check for arbitration lost */ - if (temp & I2SR_IAL) { + if (multi_master && (temp & I2SR_IAL)) { i2c_imx_clear_irq(i2c_imx, I2SR_IAL); return -EAGAIN; } - if (for_busy && (temp & I2SR_IBB)) { + if (for_busy && (!multi_master || (temp & I2SR_IBB))) { i2c_imx->stopped = 0; break; } -- 2.50.1 From e0cec363197e41af870613e8e17b30bf0e3d41b5 Mon Sep 17 00:00:00 2001 From: Carlos Song Date: Wed, 18 Dec 2024 12:42:38 +0800 Subject: [PATCH 02/16] i2c: imx: add imx7d compatible string for applying erratum ERR007805 Compatible string "fsl,imx7d-i2c" is not exited at i2c-imx driver compatible string table, at the result, "fsl,imx21-i2c" will be matched, but it will cause erratum ERR007805 not be applied in fact. So Add "fsl,imx7d-i2c" compatible string in i2c-imx driver to apply the erratum ERR007805(https://www.nxp.com/docs/en/errata/IMX7DS_3N09P.pdf). " ERR007805 I2C: When the I2C clock speed is configured for 400 kHz, the SCL low period violates the I2C spec of 1.3 uS min Description: When the I2C module is programmed to operate at the maximum clock speed of 400 kHz (as defined by the I2C spec), the SCL clock low period violates the I2C spec of 1.3 uS min. The user must reduce the clock speed to obtain the SCL low time to meet the 1.3us I2C minimum required. This behavior means the SoC is not compliant to the I2C spec at 400kHz. Workaround: To meet the clock low period requirement in fast speed mode, SCL must be configured to 384KHz or less. " "fsl,imx7d-i2c" already is documented in binding doc. This erratum fix has been included in imx6_i2c_hwdata and it is the same in all I.MX6/7/8, so just reuse it. Fixes: 39c025721d70 ("i2c: imx: Implement errata ERR007805 or e7805 bus frequency limit") Cc: stable@vger.kernel.org # v5.18+ Signed-off-by: Carlos Song Signed-off-by: Haibo Chen Reviewed-by: Frank Li Fixes: 39c025721d70 ("i2c: imx: Implement errata ERR007805 or e7805 bus frequency limit") Acked-by: Oleksij Rempel Link: https://lore.kernel.org/r/20241218044238.143414-1-carlos.song@nxp.com Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-imx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 488ee3511314..5c9a8dfbc4a0 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -335,6 +335,7 @@ static const struct of_device_id i2c_imx_dt_ids[] = { { .compatible = "fsl,imx6sll-i2c", .data = &imx6_i2c_hwdata, }, { .compatible = "fsl,imx6sx-i2c", .data = &imx6_i2c_hwdata, }, { .compatible = "fsl,imx6ul-i2c", .data = &imx6_i2c_hwdata, }, + { .compatible = "fsl,imx7d-i2c", .data = &imx6_i2c_hwdata, }, { .compatible = "fsl,imx7s-i2c", .data = &imx6_i2c_hwdata, }, { .compatible = "fsl,imx8mm-i2c", .data = &imx6_i2c_hwdata, }, { .compatible = "fsl,imx8mn-i2c", .data = &imx6_i2c_hwdata, }, -- 2.50.1 From 9a8f9320d67b27ddd7f1ee88d91820197a0e908f Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 18 Dec 2024 12:07:40 +0000 Subject: [PATCH 03/16] i2c: microchip-core: actually use repeated sends At present, where repeated sends are intended to be used, the i2c-microchip-core driver sends a stop followed by a start. Lots of i2c devices must not malfunction in the face of this behaviour, because the driver has operated like this for years! Try to keep track of whether or not a repeated send is required, and suppress sending a stop in these cases. CC: stable@vger.kernel.org Fixes: 64a6f1c4987e ("i2c: add support for microchip fpga i2c controllers") Signed-off-by: Conor Dooley Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20241218-football-composure-e56df2461461@spud Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-microchip-corei2c.c | 120 ++++++++++++++++----- 1 file changed, 94 insertions(+), 26 deletions(-) diff --git a/drivers/i2c/busses/i2c-microchip-corei2c.c b/drivers/i2c/busses/i2c-microchip-corei2c.c index d1543e7d8380..6a124e903c66 100644 --- a/drivers/i2c/busses/i2c-microchip-corei2c.c +++ b/drivers/i2c/busses/i2c-microchip-corei2c.c @@ -93,27 +93,35 @@ * @base: pointer to register struct * @dev: device reference * @i2c_clk: clock reference for i2c input clock + * @msg_queue: pointer to the messages requiring sending * @buf: pointer to msg buffer for easier use * @msg_complete: xfer completion object * @adapter: core i2c abstraction * @msg_err: error code for completed message * @bus_clk_rate: current i2c bus clock rate * @isr_status: cached copy of local ISR status + * @total_num: total number of messages to be sent/received + * @current_num: index of the current message being sent/received * @msg_len: number of bytes transferred in msg * @addr: address of the current slave + * @restart_needed: whether or not a repeated start is required after current message */ struct mchp_corei2c_dev { void __iomem *base; struct device *dev; struct clk *i2c_clk; + struct i2c_msg *msg_queue; u8 *buf; struct completion msg_complete; struct i2c_adapter adapter; int msg_err; + int total_num; + int current_num; u32 bus_clk_rate; u32 isr_status; u16 msg_len; u8 addr; + bool restart_needed; }; static void mchp_corei2c_core_disable(struct mchp_corei2c_dev *idev) @@ -222,6 +230,47 @@ static int mchp_corei2c_fill_tx(struct mchp_corei2c_dev *idev) return 0; } +static void mchp_corei2c_next_msg(struct mchp_corei2c_dev *idev) +{ + struct i2c_msg *this_msg; + u8 ctrl; + + if (idev->current_num >= idev->total_num) { + complete(&idev->msg_complete); + return; + } + + /* + * If there's been an error, the isr needs to return control + * to the "main" part of the driver, so as not to keep sending + * messages once it completes and clears the SI bit. + */ + if (idev->msg_err) { + complete(&idev->msg_complete); + return; + } + + this_msg = idev->msg_queue++; + + if (idev->current_num < (idev->total_num - 1)) { + struct i2c_msg *next_msg = idev->msg_queue; + + idev->restart_needed = next_msg->flags & I2C_M_RD; + } else { + idev->restart_needed = false; + } + + idev->addr = i2c_8bit_addr_from_msg(this_msg); + idev->msg_len = this_msg->len; + idev->buf = this_msg->buf; + + ctrl = readb(idev->base + CORE_I2C_CTRL); + ctrl |= CTRL_STA; + writeb(ctrl, idev->base + CORE_I2C_CTRL); + + idev->current_num++; +} + static irqreturn_t mchp_corei2c_handle_isr(struct mchp_corei2c_dev *idev) { u32 status = idev->isr_status; @@ -247,10 +296,14 @@ static irqreturn_t mchp_corei2c_handle_isr(struct mchp_corei2c_dev *idev) break; case STATUS_M_SLAW_ACK: case STATUS_M_TX_DATA_ACK: - if (idev->msg_len > 0) + if (idev->msg_len > 0) { mchp_corei2c_fill_tx(idev); - else - last_byte = true; + } else { + if (idev->restart_needed) + finished = true; + else + last_byte = true; + } break; case STATUS_M_TX_DATA_NACK: case STATUS_M_SLAR_NACK: @@ -287,7 +340,7 @@ static irqreturn_t mchp_corei2c_handle_isr(struct mchp_corei2c_dev *idev) mchp_corei2c_stop(idev); if (last_byte || finished) - complete(&idev->msg_complete); + mchp_corei2c_next_msg(idev); return IRQ_HANDLED; } @@ -311,21 +364,48 @@ static irqreturn_t mchp_corei2c_isr(int irq, void *_dev) return ret; } -static int mchp_corei2c_xfer_msg(struct mchp_corei2c_dev *idev, - struct i2c_msg *msg) +static int mchp_corei2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, + int num) { - u8 ctrl; + struct mchp_corei2c_dev *idev = i2c_get_adapdata(adap); + struct i2c_msg *this_msg = msgs; unsigned long time_left; + u8 ctrl; + + mchp_corei2c_core_enable(idev); + + /* + * The isr controls the flow of a transfer, this info needs to be saved + * to a location that it can access the queue information from. + */ + idev->restart_needed = false; + idev->msg_queue = msgs; + idev->total_num = num; + idev->current_num = 0; - idev->addr = i2c_8bit_addr_from_msg(msg); - idev->msg_len = msg->len; - idev->buf = msg->buf; + /* + * But the first entry to the isr is triggered by the start in this + * function, so the first message needs to be "dequeued". + */ + idev->addr = i2c_8bit_addr_from_msg(this_msg); + idev->msg_len = this_msg->len; + idev->buf = this_msg->buf; idev->msg_err = 0; - reinit_completion(&idev->msg_complete); + if (idev->total_num > 1) { + struct i2c_msg *next_msg = msgs + 1; - mchp_corei2c_core_enable(idev); + idev->restart_needed = next_msg->flags & I2C_M_RD; + } + idev->current_num++; + idev->msg_queue++; + + reinit_completion(&idev->msg_complete); + + /* + * Send the first start to pass control to the isr + */ ctrl = readb(idev->base + CORE_I2C_CTRL); ctrl |= CTRL_STA; writeb(ctrl, idev->base + CORE_I2C_CTRL); @@ -335,20 +415,8 @@ static int mchp_corei2c_xfer_msg(struct mchp_corei2c_dev *idev, if (!time_left) return -ETIMEDOUT; - return idev->msg_err; -} - -static int mchp_corei2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, - int num) -{ - struct mchp_corei2c_dev *idev = i2c_get_adapdata(adap); - int i, ret; - - for (i = 0; i < num; i++) { - ret = mchp_corei2c_xfer_msg(idev, msgs++); - if (ret) - return ret; - } + if (idev->msg_err) + return idev->msg_err; return num; } -- 2.50.1 From 49e1f0fd0d4cb03a16b8526c4e683e1958f71490 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Wed, 18 Dec 2024 12:07:42 +0000 Subject: [PATCH 04/16] i2c: microchip-core: fix "ghost" detections Running i2c-detect currently produces an output akin to: 0 1 2 3 4 5 6 7 8 9 a b c d e f 00: 08 -- 0a -- 0c -- 0e -- 10: 10 -- 12 -- 14 -- 16 -- UU 19 -- 1b -- 1d -- 1f 20: -- 21 -- 23 -- 25 -- 27 -- 29 -- 2b -- 2d -- 2f 30: -- -- -- -- -- -- -- -- 38 -- 3a -- 3c -- 3e -- 40: 40 -- 42 -- 44 -- 46 -- 48 -- 4a -- 4c -- 4e -- 50: -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- 60: 60 -- 62 -- 64 -- 66 -- 68 -- 6a -- 6c -- 6e -- 70: 70 -- 72 -- 74 -- 76 -- This happens because for an i2c_msg with a len of 0 the driver will mark the transmission of the message as a success once the START has been sent, without waiting for the devices on the bus to respond with an ACK/NAK. Since i2cdetect seems to run in a tight loop over all addresses the NAK is treated as part of the next test for the next address. Delete the fast path that marks a message as complete when idev->msg_len is zero after sending a START/RESTART since this isn't a valid scenario. CC: stable@vger.kernel.org Fixes: 64a6f1c4987e ("i2c: add support for microchip fpga i2c controllers") Signed-off-by: Conor Dooley Reviewed-by: Andi Shyti Link: https://lore.kernel.org/r/20241218-outbid-encounter-b2e78b1cc707@spud Signed-off-by: Andi Shyti --- drivers/i2c/busses/i2c-microchip-corei2c.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/i2c/busses/i2c-microchip-corei2c.c b/drivers/i2c/busses/i2c-microchip-corei2c.c index 6a124e903c66..5db73429125c 100644 --- a/drivers/i2c/busses/i2c-microchip-corei2c.c +++ b/drivers/i2c/busses/i2c-microchip-corei2c.c @@ -287,8 +287,6 @@ static irqreturn_t mchp_corei2c_handle_isr(struct mchp_corei2c_dev *idev) ctrl &= ~CTRL_STA; writeb(idev->addr, idev->base + CORE_I2C_DATA); writeb(ctrl, idev->base + CORE_I2C_CTRL); - if (idev->msg_len == 0) - finished = true; break; case STATUS_M_ARB_LOST: idev->msg_err = -EAGAIN; -- 2.50.1 From 75cd4005da5492129917a4a4ee45e81660556104 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 25 Dec 2024 19:06:40 +0800 Subject: [PATCH 05/16] ublk: detach gendisk from ublk device if add_disk() fails Inside ublk_abort_requests(), gendisk is grabbed for aborting all inflight requests. And ublk_abort_requests() is called when exiting the uring context or handling timeout. If add_disk() fails, the gendisk may have been freed when calling ublk_abort_requests(), so use-after-free can be caused when getting disk's reference in ublk_abort_requests(). Fixes the bug by detaching gendisk from ublk device if add_disk() fails. Fixes: bd23f6c2c2d0 ("ublk: quiesce request queue when aborting queue") Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20241225110640.351531-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index d4aed12dd436..934ab9332c80 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -1618,6 +1618,21 @@ static void ublk_unquiesce_dev(struct ublk_device *ub) blk_mq_kick_requeue_list(ub->ub_disk->queue); } +static struct gendisk *ublk_detach_disk(struct ublk_device *ub) +{ + struct gendisk *disk; + + /* Sync with ublk_abort_queue() by holding the lock */ + spin_lock(&ub->lock); + disk = ub->ub_disk; + ub->dev_info.state = UBLK_S_DEV_DEAD; + ub->dev_info.ublksrv_pid = -1; + ub->ub_disk = NULL; + spin_unlock(&ub->lock); + + return disk; +} + static void ublk_stop_dev(struct ublk_device *ub) { struct gendisk *disk; @@ -1631,14 +1646,7 @@ static void ublk_stop_dev(struct ublk_device *ub) ublk_unquiesce_dev(ub); } del_gendisk(ub->ub_disk); - - /* Sync with ublk_abort_queue() by holding the lock */ - spin_lock(&ub->lock); - disk = ub->ub_disk; - ub->dev_info.state = UBLK_S_DEV_DEAD; - ub->dev_info.ublksrv_pid = -1; - ub->ub_disk = NULL; - spin_unlock(&ub->lock); + disk = ublk_detach_disk(ub); put_disk(disk); unlock: mutex_unlock(&ub->mutex); @@ -2336,7 +2344,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd) out_put_cdev: if (ret) { - ub->dev_info.state = UBLK_S_DEV_DEAD; + ublk_detach_disk(ub); ublk_put_device(ub); } if (ret) -- 2.50.1 From e33ac68e5e21ec1292490dfe061e75c0dbdd3bd4 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 26 Dec 2024 16:49:23 +0000 Subject: [PATCH 06/16] io_uring/sqpoll: fix sqpoll error handling races BUG: KASAN: slab-use-after-free in __lock_acquire+0x370b/0x4a10 kernel/locking/lockdep.c:5089 Call Trace: ... _raw_spin_lock_irqsave+0x3d/0x60 kernel/locking/spinlock.c:162 class_raw_spinlock_irqsave_constructor include/linux/spinlock.h:551 [inline] try_to_wake_up+0xb5/0x23c0 kernel/sched/core.c:4205 io_sq_thread_park+0xac/0xe0 io_uring/sqpoll.c:55 io_sq_thread_finish+0x6b/0x310 io_uring/sqpoll.c:96 io_sq_offload_create+0x162/0x11d0 io_uring/sqpoll.c:497 io_uring_create io_uring/io_uring.c:3724 [inline] io_uring_setup+0x1728/0x3230 io_uring/io_uring.c:3806 ... Kun Hu reports that the SQPOLL creating error path has UAF, which happens if io_uring_alloc_task_context() fails and then io_sq_thread() manages to run and complete before the rest of error handling code, which means io_sq_thread_finish() is looking at already killed task. Note that this is mostly theoretical, requiring fault injection on the allocation side to trigger in practice. Cc: stable@vger.kernel.org Reported-by: Kun Hu Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/0f2f1aa5729332612bd01fe0f2f385fd1f06ce7c.1735231717.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- io_uring/sqpoll.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 6df5e649c413..9e5bd79fd2b5 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -405,6 +405,7 @@ void io_sqpoll_wait_sq(struct io_ring_ctx *ctx) __cold int io_sq_offload_create(struct io_ring_ctx *ctx, struct io_uring_params *p) { + struct task_struct *task_to_put = NULL; int ret; /* Retain compatibility with failing for an invalid attach attempt */ @@ -480,6 +481,7 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, } sqd->thread = tsk; + task_to_put = get_task_struct(tsk); ret = io_uring_alloc_task_context(tsk, ctx); wake_up_new_task(tsk); if (ret) @@ -490,11 +492,15 @@ __cold int io_sq_offload_create(struct io_ring_ctx *ctx, goto err; } + if (task_to_put) + put_task_struct(task_to_put); return 0; err_sqpoll: complete(&ctx->sq_data->exited); err: io_sq_thread_finish(ctx); + if (task_to_put) + put_task_struct(task_to_put); return ret; } -- 2.50.1 From 6cc45f8c1f898570916044f606be9890d295e129 Mon Sep 17 00:00:00 2001 From: Tomas Glozar Date: Wed, 27 Nov 2024 14:41:30 +0100 Subject: [PATCH 07/16] rtla/timerlat: Fix histogram ALL for zero samples rtla timerlat hist currently computers the minimum, maximum and average latency even in cases when there are zero samples. This leads to nonsensical values being calculated for maximum and minimum, and to divide by zero for average. A similar bug is fixed by 01b05fc0e5f3 ("rtla/timerlat: Fix histogram report when a cpu count is 0") but the bug still remains for printing the sum over all CPUs in timerlat_print_stats_all. The issue can be reproduced with this command: $ rtla timerlat hist -U -d 1s Index over: count: min: avg: max: Floating point exception (core dumped) (There are always no samples with -U unless the user workload is created.) Fix the bug by omitting max/min/avg when sample count is zero, displaying a dash instead, just like we already do for the individual CPUs. The logic is moved into a new function called format_summary_value, which is used for both the individual CPUs and for the overall summary. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20241127134130.51171-1-tglozar@redhat.com Fixes: 1462501c7a8 ("rtla/timerlat: Add a summary for hist mode") Signed-off-by: Tomas Glozar Signed-off-by: Steven Rostedt (Google) --- tools/tracing/rtla/src/timerlat_hist.c | 177 ++++++++++++++----------- 1 file changed, 96 insertions(+), 81 deletions(-) diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 8b66387e5f35..4403cc4eba30 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -281,6 +281,21 @@ static void timerlat_hist_header(struct osnoise_tool *tool) trace_seq_reset(s); } +/* + * format_summary_value - format a line of summary value (min, max or avg) + * of hist data + */ +static void format_summary_value(struct trace_seq *seq, + int count, + unsigned long long val, + bool avg) +{ + if (count) + trace_seq_printf(seq, "%9llu ", avg ? val / count : val); + else + trace_seq_printf(seq, "%9c ", '-'); +} + /* * timerlat_print_summary - print the summary of the hist data to the output */ @@ -328,29 +343,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_irq); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].min_irq, + false); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_thread); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].min_thread, + false); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].min_user); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].min_user, + false); } trace_seq_printf(trace->seq, "\n"); @@ -364,29 +373,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_irq / data->hist[cpu].irq_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].sum_irq, + true); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_thread / data->hist[cpu].thread_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].sum_thread, + true); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].sum_user / data->hist[cpu].user_count); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].sum_user, + true); } trace_seq_printf(trace->seq, "\n"); @@ -400,29 +403,23 @@ timerlat_print_summary(struct timerlat_hist_params *params, if (!data->hist[cpu].irq_count && !data->hist[cpu].thread_count) continue; - if (!params->no_irq) { - if (data->hist[cpu].irq_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_irq); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_irq) + format_summary_value(trace->seq, + data->hist[cpu].irq_count, + data->hist[cpu].max_irq, + false); - if (!params->no_thread) { - if (data->hist[cpu].thread_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_thread); - else - trace_seq_printf(trace->seq, " - "); - } + if (!params->no_thread) + format_summary_value(trace->seq, + data->hist[cpu].thread_count, + data->hist[cpu].max_thread, + false); - if (params->user_hist) { - if (data->hist[cpu].user_count) - trace_seq_printf(trace->seq, "%9llu ", - data->hist[cpu].max_user); - else - trace_seq_printf(trace->seq, " - "); - } + if (params->user_hist) + format_summary_value(trace->seq, + data->hist[cpu].user_count, + data->hist[cpu].max_user, + false); } trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); @@ -506,16 +503,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "min: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_irq); + format_summary_value(trace->seq, + sum.irq_count, + sum.min_irq, + false); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_thread); + format_summary_value(trace->seq, + sum.thread_count, + sum.min_thread, + false); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.min_user); + format_summary_value(trace->seq, + sum.user_count, + sum.min_user, + false); trace_seq_printf(trace->seq, "\n"); @@ -523,16 +526,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "avg: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_irq / sum.irq_count); + format_summary_value(trace->seq, + sum.irq_count, + sum.sum_irq, + true); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_thread / sum.thread_count); + format_summary_value(trace->seq, + sum.thread_count, + sum.sum_thread, + true); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.sum_user / sum.user_count); + format_summary_value(trace->seq, + sum.user_count, + sum.sum_user, + true); trace_seq_printf(trace->seq, "\n"); @@ -540,16 +549,22 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "max: "); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_irq); + format_summary_value(trace->seq, + sum.irq_count, + sum.max_irq, + false); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_thread); + format_summary_value(trace->seq, + sum.thread_count, + sum.max_thread, + false); if (params->user_hist) - trace_seq_printf(trace->seq, "%9llu ", - sum.max_user); + format_summary_value(trace->seq, + sum.user_count, + sum.max_user, + false); trace_seq_printf(trace->seq, "\n"); trace_seq_do_printf(trace->seq); -- 2.50.1 From 31ad36a271290648e7c2288a03d7b933d20254d6 Mon Sep 17 00:00:00 2001 From: chenchangcheng Date: Fri, 20 Dec 2024 15:48:47 +0800 Subject: [PATCH 08/16] objtool: Add bch2_trans_unlocked_error() to bcachefs noreturns Fix the following objtool warning during build time: fs/bcachefs/btree_trans_commit.o: warning: objtool: bch2_trans_commit_write_locked.isra.0() falls through to next function do_bch2_trans_commit.isra.0() fs/bcachefs/btree_trans_commit.o: warning: objtool: .text: unexpected end of section ...... fs/bcachefs/btree_update.o: warning: objtool: bch2_trans_update_get_key_cache() falls through to next function flush_new_cached_update() fs/bcachefs/btree_update.o: warning: objtool: flush_new_cached_update() falls through to next function bch2_trans_update_by_path() bch2_trans_unlocked_error() is an Obviously Correct (tm) panic() wrapper, add it to the list of known noreturns. [ mingo: Improved the changelog ] Fixes: fd104e2967b7 ("bcachefs: bch2_trans_verify_not_unlocked()") Signed-off-by: chenchangcheng Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20241220074847.3418134-1-ccc194101@163.com --- tools/objtool/noreturns.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index f37614cc2c1b..b2174894f9f7 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -19,6 +19,7 @@ NORETURN(__x64_sys_exit_group) NORETURN(arch_cpu_idle_dead) NORETURN(bch2_trans_in_restart_error) NORETURN(bch2_trans_restart_error) +NORETURN(bch2_trans_unlocked_error) NORETURN(cpu_bringup_and_idle) NORETURN(cpu_startup_entry) NORETURN(do_exit) -- 2.50.1 From f718faf3940e95d5d34af9041f279f598396ab7d Mon Sep 17 00:00:00 2001 From: Chen Ridong Date: Tue, 17 Dec 2024 00:48:18 +0000 Subject: [PATCH 09/16] freezer, sched: Report frozen tasks as 'D' instead of 'R' MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Before commit: f5d39b020809 ("freezer,sched: Rewrite core freezer logic") the frozen task stat was reported as 'D' in cgroup v1. However, after rewriting the core freezer logic, the frozen task stat is reported as 'R'. This is confusing, especially when a task with stat of 'S' is frozen. This bug can be reproduced with these steps: $ cd /sys/fs/cgroup/freezer/ $ mkdir test $ sleep 1000 & [1] 739 // task whose stat is 'S' $ echo 739 > test/cgroup.procs $ echo FROZEN > test/freezer.state $ ps -aux | grep 739 root 739 0.1 0.0 8376 1812 pts/0 R 10:56 0:00 sleep 1000 As shown above, a task whose stat is 'S' was changed to 'R' when it was frozen. To solve this regression, simply maintain the same reported state as before the rewrite. [ mingo: Enhanced the changelog and comments ] Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic") Signed-off-by: Chen Ridong Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Acked-by: Tejun Heo Acked-by: Michal Koutný Link: https://lore.kernel.org/r/20241217004818.3200515-1-chenridong@huaweicloud.com --- include/linux/sched.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 66b311fbd5d6..64934e0830af 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1637,8 +1637,9 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, * We're lying here, but rather than expose a completely new task state * to userspace, we can make this appear as if the task has gone through * a regular rt_mutex_lock() call. + * Report frozen tasks as uninterruptible. */ - if (tsk_state & TASK_RTLOCK_WAIT) + if ((tsk_state & TASK_RTLOCK_WAIT) || (tsk_state & TASK_FROZEN)) state = TASK_UNINTERRUPTIBLE; return fls(state); -- 2.50.1 From dc81e556f2a017d681251ace21bf06c126d5a192 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Wed, 13 Nov 2024 09:59:34 -0800 Subject: [PATCH 10/16] x86/fred: Clear WFE in missing-ENDBRANCH #CPs An indirect branch instruction sets the CPU indirect branch tracker (IBT) into WAIT_FOR_ENDBRANCH (WFE) state and WFE stays asserted across the instruction boundary. When the decoder finds an inappropriate instruction while WFE is set ENDBR, the CPU raises a #CP fault. For the "kernel IBT no ENDBR" selftest where #CPs are deliberately triggered, the WFE state of the interrupted context needs to be cleared to let execution continue. Otherwise when the CPU resumes from the instruction that just caused the previous #CP, another missing-ENDBRANCH #CP is raised and the CPU enters a dead loop. This is not a problem with IDT because it doesn't preserve WFE and IRET doesn't set WFE. But FRED provides space on the entry stack (in an expanded CS area) to save and restore the WFE state, thus the WFE state is no longer clobbered, so software must clear it. Clear WFE to avoid dead looping in ibt_clear_fred_wfe() and the !ibt_fatal code path when execution is allowed to continue. Clobbering WFE in any other circumstance is a security-relevant bug. [ dhansen: changelog rewording ] Fixes: a5f6c2ace997 ("x86/shstk: Add user control-protection fault handler") Signed-off-by: Xin Li (Intel) Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20241113175934.3897541-1-xin%40zytor.com --- arch/x86/kernel/cet.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/x86/kernel/cet.c b/arch/x86/kernel/cet.c index d2c732a34e5d..303bf74d175b 100644 --- a/arch/x86/kernel/cet.c +++ b/arch/x86/kernel/cet.c @@ -81,6 +81,34 @@ static void do_user_cp_fault(struct pt_regs *regs, unsigned long error_code) static __ro_after_init bool ibt_fatal = true; +/* + * By definition, all missing-ENDBRANCH #CPs are a result of WFE && !ENDBR. + * + * For the kernel IBT no ENDBR selftest where #CPs are deliberately triggered, + * the WFE state of the interrupted context needs to be cleared to let execution + * continue. Otherwise when the CPU resumes from the instruction that just + * caused the previous #CP, another missing-ENDBRANCH #CP is raised and the CPU + * enters a dead loop. + * + * This is not a problem with IDT because it doesn't preserve WFE and IRET doesn't + * set WFE. But FRED provides space on the entry stack (in an expanded CS area) + * to save and restore the WFE state, thus the WFE state is no longer clobbered, + * so software must clear it. + */ +static void ibt_clear_fred_wfe(struct pt_regs *regs) +{ + /* + * No need to do any FRED checks. + * + * For IDT event delivery, the high-order 48 bits of CS are pushed + * as 0s into the stack, and later IRET ignores these bits. + * + * For FRED, a test to check if fred_cs.wfe is set would be dropped + * by compilers. + */ + regs->fred_cs.wfe = 0; +} + static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code) { if ((error_code & CP_EC) != CP_ENDBR) { @@ -90,6 +118,7 @@ static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(regs->ip == (unsigned long)&ibt_selftest_noendbr)) { regs->ax = 0; + ibt_clear_fred_wfe(regs); return; } @@ -97,6 +126,7 @@ static void do_kernel_cp_fault(struct pt_regs *regs, unsigned long error_code) if (!ibt_fatal) { printk(KERN_DEFAULT CUT_HERE); __warn(__FILE__, __LINE__, (void *)regs->ip, TAINT_WARN, regs, NULL); + ibt_clear_fred_wfe(regs); return; } BUG(); -- 2.50.1 From 27834971f616c5e154423c578fa95e0444444ce1 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 19 Jun 2024 19:18:01 +0800 Subject: [PATCH 11/16] virt: tdx-guest: Just leak decrypted memory on unrecoverable errors In CoCo VMs it is possible for the untrusted host to cause set_memory_decrypted() to fail such that an error is returned and the resulting memory is shared. Callers need to take care to handle these errors to avoid returning decrypted (shared) memory to the page allocator, which could lead to functional or security issues. Leak the decrypted memory when set_memory_decrypted() fails, and don't need to print an error since set_memory_decrypted() will call WARN_ONCE(). Fixes: f4738f56d1dc ("virt: tdx-guest: Add Quote generation support using TSM_REPORTS") Signed-off-by: Li RongQing Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Reviewed-by: Rick Edgecombe Reviewed-by: Kirill A. Shutemov Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20240619111801.25630-1-lirongqing%40baidu.com --- drivers/virt/coco/tdx-guest/tdx-guest.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c index d7db6c824e13..224e7dde9cde 100644 --- a/drivers/virt/coco/tdx-guest/tdx-guest.c +++ b/drivers/virt/coco/tdx-guest/tdx-guest.c @@ -124,10 +124,8 @@ static void *alloc_quote_buf(void) if (!addr) return NULL; - if (set_memory_decrypted((unsigned long)addr, count)) { - free_pages_exact(addr, len); + if (set_memory_decrypted((unsigned long)addr, count)) return NULL; - } return addr; } -- 2.50.1 From fc033cf25e612e840e545f8d5ad2edd6ba613ed5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 29 Dec 2024 13:15:45 -0800 Subject: [PATCH 12/16] Linux 6.13-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5c9b1d2d59b4..48e89108aa58 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.50.1 From a619cba8c69c434258ff4101d463322cd63e1bdc Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Fri, 3 Jan 2025 23:18:26 +0900 Subject: [PATCH 13/16] gpio: virtuser: fix missing lookup table cleanups When a virtuser device is created via configfs and the probe fails due to an incorrect lookup table, the table is not removed. This prevents subsequent probe attempts from succeeding, even if the issue is corrected, unless the device is released. Additionally, cleanup is also needed in the less likely case of platform_device_register_full() failure. Besides, a consistent memory leak in lookup_table->dev_id was spotted using kmemleak by toggling the live state between 0 and 1 with a correct lookup table. Introduce gpio_virtuser_remove_lookup_table() as the counterpart to the existing gpio_virtuser_make_lookup_table() and call it from all necessary points to ensure proper cleanup. Fixes: 91581c4b3f29 ("gpio: virtuser: new virtual testing driver for the GPIO API") Signed-off-by: Koichiro Den Link: https://lore.kernel.org/r/20250103141829.430662-2-koichiro.den@canonical.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-virtuser.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index 91b6352c957c..e89b1239b635 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -1439,6 +1439,15 @@ gpio_virtuser_make_lookup_table(struct gpio_virtuser_device *dev) return 0; } +static void +gpio_virtuser_remove_lookup_table(struct gpio_virtuser_device *dev) +{ + gpiod_remove_lookup_table(dev->lookup_table); + kfree(dev->lookup_table->dev_id); + kfree(dev->lookup_table); + dev->lookup_table = NULL; +} + static struct fwnode_handle * gpio_virtuser_make_device_swnode(struct gpio_virtuser_device *dev) { @@ -1487,10 +1496,8 @@ gpio_virtuser_device_activate(struct gpio_virtuser_device *dev) pdevinfo.fwnode = swnode; ret = gpio_virtuser_make_lookup_table(dev); - if (ret) { - fwnode_remove_software_node(swnode); - return ret; - } + if (ret) + goto err_remove_swnode; reinit_completion(&dev->probe_completion); dev->driver_bound = false; @@ -1498,23 +1505,31 @@ gpio_virtuser_device_activate(struct gpio_virtuser_device *dev) pdev = platform_device_register_full(&pdevinfo); if (IS_ERR(pdev)) { + ret = PTR_ERR(pdev); bus_unregister_notifier(&platform_bus_type, &dev->bus_notifier); - fwnode_remove_software_node(swnode); - return PTR_ERR(pdev); + goto err_remove_lookup_table; } wait_for_completion(&dev->probe_completion); bus_unregister_notifier(&platform_bus_type, &dev->bus_notifier); if (!dev->driver_bound) { - platform_device_unregister(pdev); - fwnode_remove_software_node(swnode); - return -ENXIO; + ret = -ENXIO; + goto err_unregister_pdev; } dev->pdev = pdev; return 0; + +err_unregister_pdev: + platform_device_unregister(pdev); +err_remove_lookup_table: + gpio_virtuser_remove_lookup_table(dev); +err_remove_swnode: + fwnode_remove_software_node(swnode); + + return ret; } static void @@ -1526,10 +1541,9 @@ gpio_virtuser_device_deactivate(struct gpio_virtuser_device *dev) swnode = dev_fwnode(&dev->pdev->dev); platform_device_unregister(dev->pdev); + gpio_virtuser_remove_lookup_table(dev); fwnode_remove_software_node(swnode); dev->pdev = NULL; - gpiod_remove_lookup_table(dev->lookup_table); - kfree(dev->lookup_table); } static ssize_t -- 2.50.1 From 656cc2e892f128b03ea9ef19bd11d70f71d5472b Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Fri, 3 Jan 2025 23:18:27 +0900 Subject: [PATCH 14/16] gpio: virtuser: fix handling of multiple conn_ids in lookup table Creating a virtuser device via configfs with multiple conn_ids fails due to incorrect indexing of lookup entries. Correct the indexing logic to ensure proper functionality when multiple gpio_virtuser_lookup are created. Fixes: 91581c4b3f29 ("gpio: virtuser: new virtual testing driver for the GPIO API") Signed-off-by: Koichiro Den Link: https://lore.kernel.org/r/20250103141829.430662-3-koichiro.den@canonical.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-virtuser.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index e89b1239b635..d6244f0d3bc7 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -1410,7 +1410,7 @@ gpio_virtuser_make_lookup_table(struct gpio_virtuser_device *dev) size_t num_entries = gpio_virtuser_get_lookup_count(dev); struct gpio_virtuser_lookup_entry *entry; struct gpio_virtuser_lookup *lookup; - unsigned int i = 0; + unsigned int i = 0, idx; lockdep_assert_held(&dev->lock); @@ -1424,12 +1424,12 @@ gpio_virtuser_make_lookup_table(struct gpio_virtuser_device *dev) return -ENOMEM; list_for_each_entry(lookup, &dev->lookup_list, siblings) { + idx = 0; list_for_each_entry(entry, &lookup->entry_list, siblings) { - table->table[i] = + table->table[i++] = GPIO_LOOKUP_IDX(entry->key, entry->offset < 0 ? U16_MAX : entry->offset, - lookup->con_id, i, entry->flags); - i++; + lookup->con_id, idx++, entry->flags); } } -- 2.50.1 From c7c434c1dba955005f5161dae73f09c0a922cfa7 Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Fri, 3 Jan 2025 23:18:28 +0900 Subject: [PATCH 15/16] gpio: virtuser: lock up configfs that an instantiated device depends on Once a virtuser device is instantiated and actively used, allowing rmdir for its configfs serves no purpose and can be confusing. Userspace interacts with the virtual consumer at arbitrary times, meaning it depends on its existence. Make the subsystem itself depend on the configfs entry for a virtuser device while it is in active use. Signed-off-by: Koichiro Den Link: https://lore.kernel.org/r/20250103141829.430662-4-koichiro.den@canonical.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-virtuser.c | 47 ++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/drivers/gpio/gpio-virtuser.c b/drivers/gpio/gpio-virtuser.c index d6244f0d3bc7..e89f299f2140 100644 --- a/drivers/gpio/gpio-virtuser.c +++ b/drivers/gpio/gpio-virtuser.c @@ -1546,6 +1546,30 @@ gpio_virtuser_device_deactivate(struct gpio_virtuser_device *dev) dev->pdev = NULL; } +static void +gpio_virtuser_device_lockup_configfs(struct gpio_virtuser_device *dev, bool lock) +{ + struct configfs_subsystem *subsys = dev->group.cg_subsys; + struct gpio_virtuser_lookup_entry *entry; + struct gpio_virtuser_lookup *lookup; + + /* + * The device only needs to depend on leaf lookup entries. This is + * sufficient to lock up all the configfs entries that the + * instantiated, alive device depends on. + */ + list_for_each_entry(lookup, &dev->lookup_list, siblings) { + list_for_each_entry(entry, &lookup->entry_list, siblings) { + if (lock) + WARN_ON(configfs_depend_item_unlocked( + subsys, &entry->group.cg_item)); + else + configfs_undepend_item_unlocked( + &entry->group.cg_item); + } + } +} + static ssize_t gpio_virtuser_device_config_live_store(struct config_item *item, const char *page, size_t count) @@ -1558,15 +1582,24 @@ gpio_virtuser_device_config_live_store(struct config_item *item, if (ret) return ret; - guard(mutex)(&dev->lock); + if (live) + gpio_virtuser_device_lockup_configfs(dev, true); - if (live == gpio_virtuser_device_is_live(dev)) - return -EPERM; + scoped_guard(mutex, &dev->lock) { + if (live == gpio_virtuser_device_is_live(dev)) + ret = -EPERM; + else if (live) + ret = gpio_virtuser_device_activate(dev); + else + gpio_virtuser_device_deactivate(dev); + } - if (live) - ret = gpio_virtuser_device_activate(dev); - else - gpio_virtuser_device_deactivate(dev); + /* + * Undepend is required only if device disablement (live == 0) + * succeeds or if device enablement (live == 1) fails. + */ + if (live == !!ret) + gpio_virtuser_device_lockup_configfs(dev, false); return ret ?: count; } -- 2.50.1 From 8bd76b3d3f3af7ac2898b6a27ad90c444fec418f Mon Sep 17 00:00:00 2001 From: Koichiro Den Date: Fri, 3 Jan 2025 23:18:29 +0900 Subject: [PATCH 16/16] gpio: sim: lock up configfs that an instantiated device depends on Once a sim device is instantiated and actively used, allowing rmdir for its configfs serves no purpose and can be confusing. Effectively, arbitrary users start depending on its existence. Make the subsystem itself depend on the configfs entry for a sim device while it is in active use. Signed-off-by: Koichiro Den Link: https://lore.kernel.org/r/20250103141829.430662-5-koichiro.den@canonical.com Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-sim.c | 48 +++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index f387dad81f29..686ae3d11ba3 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -1027,6 +1027,30 @@ static void gpio_sim_device_deactivate(struct gpio_sim_device *dev) dev->pdev = NULL; } +static void +gpio_sim_device_lockup_configfs(struct gpio_sim_device *dev, bool lock) +{ + struct configfs_subsystem *subsys = dev->group.cg_subsys; + struct gpio_sim_bank *bank; + struct gpio_sim_line *line; + + /* + * The device only needs to depend on leaf line entries. This is + * sufficient to lock up all the configfs entries that the + * instantiated, alive device depends on. + */ + list_for_each_entry(bank, &dev->bank_list, siblings) { + list_for_each_entry(line, &bank->line_list, siblings) { + if (lock) + WARN_ON(configfs_depend_item_unlocked( + subsys, &line->group.cg_item)); + else + configfs_undepend_item_unlocked( + &line->group.cg_item); + } + } +} + static ssize_t gpio_sim_device_config_live_store(struct config_item *item, const char *page, size_t count) @@ -1039,14 +1063,24 @@ gpio_sim_device_config_live_store(struct config_item *item, if (ret) return ret; - guard(mutex)(&dev->lock); + if (live) + gpio_sim_device_lockup_configfs(dev, true); - if (live == gpio_sim_device_is_live(dev)) - ret = -EPERM; - else if (live) - ret = gpio_sim_device_activate(dev); - else - gpio_sim_device_deactivate(dev); + scoped_guard(mutex, &dev->lock) { + if (live == gpio_sim_device_is_live(dev)) + ret = -EPERM; + else if (live) + ret = gpio_sim_device_activate(dev); + else + gpio_sim_device_deactivate(dev); + } + + /* + * Undepend is required only if device disablement (live == 0) + * succeeds or if device enablement (live == 1) fails. + */ + if (live == !!ret) + gpio_sim_device_lockup_configfs(dev, false); return ret ?: count; } -- 2.50.1