From 22a18935d7d96bbb1a28076f843c1926d0ba189e Mon Sep 17 00:00:00 2001 From: John Edwards Date: Thu, 10 Oct 2024 23:09:23 +0000 Subject: [PATCH 01/16] Input: xpad - add support for MSI Claw A1M Add MSI Claw A1M controller to xpad_device match table when in xinput mode. Add MSI VID as XPAD_XBOX360_VENDOR. Signed-off-by: John Edwards Reviewed-by: Derek J. Clark Reviewed-by: Christopher Snowhill Link: https://lore.kernel.org/r/20241010232020.3292284-4-uejji@uejji.net Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/xpad.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 30b4cca8b69f..22ea58bf76cb 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -218,6 +218,7 @@ static const struct xpad_device { { 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX }, { 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, + { 0x0db0, 0x1901, "Micro Star International Xbox360 Controller for Windows", 0, XTYPE_XBOX360 }, { 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX }, { 0x0e4c, 0x1103, "Radica Gamester Reflex", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX }, { 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX }, @@ -493,6 +494,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOX360_VENDOR(0x07ff), /* Mad Catz Gamepad */ XPAD_XBOXONE_VENDOR(0x0b05), /* ASUS controllers */ XPAD_XBOX360_VENDOR(0x0c12), /* Zeroplus X-Box 360 controllers */ + XPAD_XBOX360_VENDOR(0x0db0), /* Micro Star International X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f Xbox One controllers */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori controllers */ -- 2.51.0 From 2de01e0e57f3ebe7f90b08f6bca5ce0f3da3829f Mon Sep 17 00:00:00 2001 From: Nikita Travkin Date: Fri, 4 Oct 2024 21:17:30 +0500 Subject: [PATCH 02/16] Input: zinitix - don't fail if linux,keycodes prop is absent When initially adding the touchkey support, a mistake was made in the property parsing code. The possible negative errno from device_property_count_u32() was never checked, which was an oversight left from converting to it from the of_property as part of the review fixes. Re-add the correct handling of the absent property, in which case zero touchkeys should be assumed, which would disable the feature. Reported-by: Jakob Hauser Tested-by: Jakob Hauser Fixes: 075d9b22c8fe ("Input: zinitix - add touchkey support") Reviewed-by: Linus Walleij Signed-off-by: Nikita Travkin Tested-by: Yassine Oudjana Link: https://lore.kernel.org/r/20241004-zinitix-no-keycodes-v2-1-876dc9fea4b6@trvn.ru Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/zinitix.c | 34 +++++++++++++++++++---------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c index 52b3950460e2..716d6fa60f86 100644 --- a/drivers/input/touchscreen/zinitix.c +++ b/drivers/input/touchscreen/zinitix.c @@ -645,19 +645,29 @@ static int zinitix_ts_probe(struct i2c_client *client) return error; } - bt541->num_keycodes = device_property_count_u32(&client->dev, "linux,keycodes"); - if (bt541->num_keycodes > ARRAY_SIZE(bt541->keycodes)) { - dev_err(&client->dev, "too many keys defined (%d)\n", bt541->num_keycodes); - return -EINVAL; - } + if (device_property_present(&client->dev, "linux,keycodes")) { + bt541->num_keycodes = device_property_count_u32(&client->dev, + "linux,keycodes"); + if (bt541->num_keycodes < 0) { + dev_err(&client->dev, "Failed to count keys (%d)\n", + bt541->num_keycodes); + return bt541->num_keycodes; + } else if (bt541->num_keycodes > ARRAY_SIZE(bt541->keycodes)) { + dev_err(&client->dev, "Too many keys defined (%d)\n", + bt541->num_keycodes); + return -EINVAL; + } - error = device_property_read_u32_array(&client->dev, "linux,keycodes", - bt541->keycodes, - bt541->num_keycodes); - if (error) { - dev_err(&client->dev, - "Unable to parse \"linux,keycodes\" property: %d\n", error); - return error; + error = device_property_read_u32_array(&client->dev, + "linux,keycodes", + bt541->keycodes, + bt541->num_keycodes); + if (error) { + dev_err(&client->dev, + "Unable to parse \"linux,keycodes\" property: %d\n", + error); + return error; + } } error = zinitix_init_input_dev(bt541); -- 2.51.0 From 2c02f7375e658ae93d57a31a66f91b62754ef8f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 18 Oct 2024 21:43:00 -0400 Subject: [PATCH 03/16] fgraph: Use CPU hotplug mechanism to initialize idle shadow stacks The function graph infrastructure allocates a shadow stack for every task when enabled. This includes the idle tasks. The first time the function graph is invoked, the shadow stacks are created and never freed until the task exits. This includes the idle tasks. Only the idle tasks that were for online CPUs had their shadow stacks created when function graph tracing started. If function graph tracing is enabled and a CPU comes online, the idle task representing that CPU will not have its shadow stack created, and all function graph tracing for that idle task will be silently dropped. Instead, use the CPU hotplug mechanism to allocate the idle shadow stacks. This will include idle tasks for CPUs that come online during tracing. This issue can be reproduced by: # cd /sys/kernel/tracing # echo 0 > /sys/devices/system/cpu/cpu1/online # echo 0 > set_ftrace_pid # echo function_graph > current_tracer # echo 1 > options/funcgraph-proc # echo 1 > /sys/devices/system/cpu/cpu1 # grep '' per_cpu/cpu1/trace | head Before, nothing would show up. After: 1) -0 | 0.811 us | __enqueue_entity(); 1) -0 | 5.626 us | } /* enqueue_entity */ 1) -0 | | dl_server_update_idle_time() { 1) -0 | | dl_scaled_delta_exec() { 1) -0 | 0.450 us | arch_scale_cpu_capacity(); 1) -0 | 1.242 us | } 1) -0 | 1.908 us | } 1) -0 | | dl_server_start() { 1) -0 | | enqueue_dl_entity() { 1) -0 | | task_contending() { Note, if tracing stops and restarts, the old way would then initialize the onlined CPUs. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Mark Rutland Cc: Thomas Gleixner Link: https://lore.kernel.org/20241018214300.6df82178@rorschach Fixes: 868baf07b1a25 ("ftrace: Fix memory leak with function graph and cpu hotplug") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d7d4fb403f6f..43f4e3f57438 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1160,19 +1160,13 @@ void fgraph_update_pid_func(void) static int start_graph_tracing(void) { unsigned long **ret_stack_list; - int ret, cpu; + int ret; ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; - /* The cpu_boot init_task->ret_stack will never be freed */ - for_each_online_cpu(cpu) { - if (!idle_task(cpu)->ret_stack) - ftrace_graph_init_idle_task(idle_task(cpu), cpu); - } - do { ret = alloc_retstack_tasklist(ret_stack_list); } while (ret == -EAGAIN); @@ -1242,14 +1236,34 @@ static void ftrace_graph_disable_direct(bool disable_branch) fgraph_direct_gops = &fgraph_stub; } +/* The cpu_boot init_task->ret_stack will never be freed */ +static int fgraph_cpu_init(unsigned int cpu) +{ + if (!idle_task(cpu)->ret_stack) + ftrace_graph_init_idle_task(idle_task(cpu), cpu); + return 0; +} + int register_ftrace_graph(struct fgraph_ops *gops) { + static bool fgraph_initialized; int command = 0; int ret = 0; int i = -1; mutex_lock(&ftrace_lock); + if (!fgraph_initialized) { + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "fgraph_idle_init", + fgraph_cpu_init, NULL); + if (ret < 0) { + pr_warn("fgraph: Error to init cpu hotplug support\n"); + return ret; + } + fgraph_initialized = true; + ret = 0; + } + if (!fgraph_array[0]) { /* The array must always have real data on it */ for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) -- 2.51.0 From fae4078c289a2f24229c0de652249948b1cd6bdb Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 18 Oct 2024 21:52:12 -0400 Subject: [PATCH 04/16] fgraph: Allocate ret_stack_list with proper size The ret_stack_list is an array of ret_stack shadow stacks for the function graph usage. When the first function graph is enabled, all tasks in the system get a shadow stack. The ret_stack_list is a 32 element array of pointers to these shadow stacks. It allocates the shadow stack in batches (32 stacks at a time), assigns them to running tasks, and continues until all tasks are covered. When the function graph shadow stack changed from an array of ftrace_ret_stack structures to an array of longs, the allocation of ret_stack_list went from allocating an array of 32 elements to just a block defined by SHADOW_STACK_SIZE. Luckily, that's defined as PAGE_SIZE and is much more than enough to hold 32 pointers. But it is way overkill for the amount needed to allocate. Change the allocation of ret_stack_list back to a kcalloc() of FTRACE_RETSTACK_ALLOC_SIZE pointers. Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20241018215212.23f13f40@rorschach Fixes: 42675b723b484 ("function_graph: Convert ret_stack to a series of longs") Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fgraph.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 43f4e3f57438..41e7a15dcb50 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1162,7 +1162,8 @@ static int start_graph_tracing(void) unsigned long **ret_stack_list; int ret; - ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); + ret_stack_list = kcalloc(FTRACE_RETSTACK_ALLOC_SIZE, + sizeof(*ret_stack_list), GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; -- 2.51.0 From ae6a888a4357131c01d85f4c91fb32552dd0bf70 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 19 Oct 2024 09:16:51 -0600 Subject: [PATCH 05/16] io_uring/rw: fix wrong NOWAIT check in io_rw_init_file() A previous commit improved how !FMODE_NOWAIT is dealt with, but inadvertently negated a check whilst doing so. This caused -EAGAIN to be returned from reading files with O_NONBLOCK set. Fix up the check for REQ_F_SUPPORT_NOWAIT. Reported-by: Julian Orth Link: https://github.com/axboe/liburing/issues/1270 Fixes: f7c913438533 ("io_uring/rw: allow pollable non-blocking attempts for !FMODE_NOWAIT") Signed-off-by: Jens Axboe --- io_uring/rw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/io_uring/rw.c b/io_uring/rw.c index 80ae3c2ebb70..354c4e175654 100644 --- a/io_uring/rw.c +++ b/io_uring/rw.c @@ -807,7 +807,7 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type) * reliably. If not, or it IOCB_NOWAIT is set, don't retry. */ if (kiocb->ki_flags & IOCB_NOWAIT || - ((file->f_flags & O_NONBLOCK && (req->flags & REQ_F_SUPPORT_NOWAIT)))) + ((file->f_flags & O_NONBLOCK && !(req->flags & REQ_F_SUPPORT_NOWAIT)))) req->flags |= REQ_F_NOWAIT; if (ctx->flags & IORING_SETUP_IOPOLL) { -- 2.51.0 From 42f7652d3eb527d03665b09edac47f85fb600924 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 20 Oct 2024 15:19:38 -0700 Subject: [PATCH 06/16] Linux 6.12-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 8cf3cf528892..a9a7d9ffaa98 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 8933805623fafc0e276ddd8110672068c5bd9763 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 2 Oct 2024 13:19:48 -0600 Subject: [PATCH 07/16] block: move iostat check into blk_acount_io_start() Rather than have blk_do_io_stat() check for both RQF_IO_STAT and whether the request is a passthrough requests every time, move both of those checks into blk_account_io_start(). Then blk_do_io_stat() can be reduced to just checking for RQF_IO_STAT. Reviewed-by: Keith Busch Reviewed-by: Anuj Gupta Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 40 +++++++++++++++++++++------------------- block/blk.h | 2 +- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index cf626e061dd7..6a339942948a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -359,8 +359,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, if (data->flags & BLK_MQ_REQ_PM) data->rq_flags |= RQF_PM; - if (blk_queue_io_stat(q)) - data->rq_flags |= RQF_IO_STAT; rq->rq_flags = data->rq_flags; if (data->rq_flags & RQF_SCHED_TAGS) { @@ -1000,24 +998,28 @@ static inline void blk_account_io_start(struct request *req) { trace_block_io_start(req); - if (blk_do_io_stat(req)) { - /* - * All non-passthrough requests are created from a bio with one - * exception: when a flush command that is part of a flush sequence - * generated by the state machine in blk-flush.c is cloned onto the - * lower device by dm-multipath we can get here without a bio. - */ - if (req->bio) - req->part = req->bio->bi_bdev; - else - req->part = req->q->disk->part0; + if (!blk_queue_io_stat(req->q)) + return; + if (blk_rq_is_passthrough(req)) + return; - part_stat_lock(); - update_io_ticks(req->part, jiffies, false); - part_stat_local_inc(req->part, - in_flight[op_is_write(req_op(req))]); - part_stat_unlock(); - } + req->rq_flags |= RQF_IO_STAT; + + /* + * All non-passthrough requests are created from a bio with one + * exception: when a flush command that is part of a flush sequence + * generated by the state machine in blk-flush.c is cloned onto the + * lower device by dm-multipath we can get here without a bio. + */ + if (req->bio) + req->part = req->bio->bi_bdev; + else + req->part = req->q->disk->part0; + + part_stat_lock(); + update_io_ticks(req->part, jiffies, false); + part_stat_local_inc(req->part, in_flight[op_is_write(req_op(req))]); + part_stat_unlock(); } static inline void __blk_mq_end_request_acct(struct request *rq, u64 now) diff --git a/block/blk.h b/block/blk.h index c718e4291db0..84178e535533 100644 --- a/block/blk.h +++ b/block/blk.h @@ -413,7 +413,7 @@ int blk_dev_init(void); */ static inline bool blk_do_io_stat(struct request *rq) { - return (rq->rq_flags & RQF_IO_STAT) && !blk_rq_is_passthrough(rq); + return rq->rq_flags & RQF_IO_STAT; } void update_io_ticks(struct block_device *part, unsigned long now, bool end); -- 2.51.0 From 2c50ec98fc6cab28df35e0a22a2bcc7957d9d0ab Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 2 Oct 2024 14:06:47 -0600 Subject: [PATCH 08/16] block: remove redundant passthrough check in blk_mq_need_time_stamp() Simply checking the rq_flags is enough to determine if accounting is being done for this request. Reviewed-by: Keith Busch Reviewed-by: Anuj Gupta Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 4fecf46ef681..59e9adf815a4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -857,12 +857,6 @@ void blk_mq_end_request_batch(struct io_comp_batch *ib); */ static inline bool blk_mq_need_time_stamp(struct request *rq) { - /* - * passthrough io doesn't use iostat accounting, cgroup stats - * and io scheduler functionalities. - */ - if (blk_rq_is_passthrough(rq)) - return false; return (rq->rq_flags & (RQF_IO_STAT | RQF_STATS | RQF_USE_SCHED)); } -- 2.51.0 From fd0a63bcda40c09463f31b9401dbb0cb01c51674 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Oct 2024 07:26:12 -0600 Subject: [PATCH 09/16] block: remove 'req->part' check for stats accounting If RQF_IO_STAT is set, then accounting is enabled. There's no need to further gate this on req->part being set or not, RQF_IO_STAT should never be set if accounting is not being done for this request. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 6a339942948a..c7220ed0392e 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -92,7 +92,7 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv) { struct mq_inflight *mi = priv; - if (rq->part && blk_do_io_stat(rq) && + if (blk_do_io_stat(rq) && (!bdev_is_partition(mi->part) || rq->part == mi->part) && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) mi->inflight[rq_data_dir(rq)]++; @@ -762,7 +762,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags); static void blk_account_io_completion(struct request *req, unsigned int bytes) { - if (req->part && blk_do_io_stat(req)) { + if (blk_do_io_stat(req)) { const int sgrp = op_stat_group(req_op(req)); part_stat_lock(); @@ -980,8 +980,7 @@ static inline void blk_account_io_done(struct request *req, u64 now) * normal IO on queueing nor completion. Accounting the * containing request is enough. */ - if (blk_do_io_stat(req) && req->part && - !(req->rq_flags & RQF_FLUSH_SEQ)) { + if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { const int sgrp = op_stat_group(req_op(req)); part_stat_lock(); -- 2.51.0 From e3569ecae44daa6d88ac1bb0c6b976c298eff966 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 3 Oct 2024 07:29:49 -0600 Subject: [PATCH 10/16] block: kill blk_do_io_stat() helper It's now just checking whether or not RQF_IO_STAT is set, so let's get rid of it and just open-code the specific flag that is being checked. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-merge.c | 13 ++++++------- block/blk-mq.c | 6 +++--- block/blk.h | 11 ----------- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index ad763ec313b6..8b9a9646aed8 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -797,7 +797,7 @@ static inline void blk_update_mixed_merge(struct request *req, static void blk_account_io_merge_request(struct request *req) { - if (blk_do_io_stat(req)) { + if (req->rq_flags & RQF_IO_STAT) { part_stat_lock(); part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); part_stat_local_dec(req->part, @@ -1005,12 +1005,11 @@ enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) static void blk_account_io_merge_bio(struct request *req) { - if (!blk_do_io_stat(req)) - return; - - part_stat_lock(); - part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); - part_stat_unlock(); + if (req->rq_flags & RQF_IO_STAT) { + part_stat_lock(); + part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); + part_stat_unlock(); + } } enum bio_merge_status bio_attempt_back_merge(struct request *req, diff --git a/block/blk-mq.c b/block/blk-mq.c index c7220ed0392e..b249514c2d63 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -92,7 +92,7 @@ static bool blk_mq_check_inflight(struct request *rq, void *priv) { struct mq_inflight *mi = priv; - if (blk_do_io_stat(rq) && + if (rq->rq_flags & RQF_IO_STAT && (!bdev_is_partition(mi->part) || rq->part == mi->part) && blk_mq_rq_state(rq) == MQ_RQ_IN_FLIGHT) mi->inflight[rq_data_dir(rq)]++; @@ -762,7 +762,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags); static void blk_account_io_completion(struct request *req, unsigned int bytes) { - if (blk_do_io_stat(req)) { + if (req->rq_flags & RQF_IO_STAT) { const int sgrp = op_stat_group(req_op(req)); part_stat_lock(); @@ -980,7 +980,7 @@ static inline void blk_account_io_done(struct request *req, u64 now) * normal IO on queueing nor completion. Accounting the * containing request is enough. */ - if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) { + if ((req->rq_flags & (RQF_IO_STAT|RQF_FLUSH_SEQ)) == RQF_IO_STAT) { const int sgrp = op_stat_group(req_op(req)); part_stat_lock(); diff --git a/block/blk.h b/block/blk.h index 84178e535533..ea926d685e92 100644 --- a/block/blk.h +++ b/block/blk.h @@ -405,17 +405,6 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi, struct queue_limits *lim); int blk_dev_init(void); -/* - * Contribute to IO statistics IFF: - * - * a) it's attached to a gendisk, and - * b) the queue had IO stats enabled when this request was started - */ -static inline bool blk_do_io_stat(struct request *rq) -{ - return rq->rq_flags & RQF_IO_STAT; -} - void update_io_ticks(struct block_device *part, unsigned long now, bool end); unsigned int part_in_flight(struct block_device *part); -- 2.51.0 From ba40f4c590f4c91119f3802ed501499709f583a9 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 3 Oct 2024 00:11:41 +0200 Subject: [PATCH 11/16] block: add support for defining read-only partitions Add support for defining read-only partitions and complete support for it in the cmdline partition parser as the additional "ro" after a partition is scanned but never actually applied. Signed-off-by: Christian Marangi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241002221306.4403-2-ansuelsmth@gmail.com Signed-off-by: Jens Axboe --- block/blk.h | 1 + block/partitions/cmdline.c | 3 +++ block/partitions/core.c | 3 +++ 3 files changed, 7 insertions(+) diff --git a/block/blk.h b/block/blk.h index ea926d685e92..8fddaf6eae49 100644 --- a/block/blk.h +++ b/block/blk.h @@ -547,6 +547,7 @@ void blk_free_ext_minor(unsigned int minor); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 +#define ADDPART_FLAG_READONLY 4 int bdev_add_partition(struct gendisk *disk, int partno, sector_t start, sector_t length); int bdev_del_partition(struct gendisk *disk, int partno); diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c index 152c85df92b2..da3e719d8e51 100644 --- a/block/partitions/cmdline.c +++ b/block/partitions/cmdline.c @@ -237,6 +237,9 @@ static int add_part(int slot, struct cmdline_subpart *subpart, put_partition(state, slot, subpart->from >> 9, subpart->size >> 9); + if (subpart->flags & PF_RDONLY) + state->parts[slot].flags |= ADDPART_FLAG_READONLY; + info = &state->parts[slot].info; strscpy(info->volname, subpart->name, sizeof(info->volname)); diff --git a/block/partitions/core.c b/block/partitions/core.c index 5bd7a603092e..629ed08b9ab9 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -373,6 +373,9 @@ static struct block_device *add_partition(struct gendisk *disk, int partno, goto out_del; } + if (flags & ADDPART_FLAG_READONLY) + bdev_set_flag(bdev, BD_READ_ONLY); + /* everything is up and running, commence */ err = xa_insert(&disk->part_tbl, partno, bdev, GFP_KERNEL); if (err) -- 2.51.0 From 592e4deeab50aa71a0b20820f2376c900e920211 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 3 Oct 2024 00:11:42 +0200 Subject: [PATCH 12/16] docs: block: Document support for read-only partition in cmdline part Document support for read-only partition in cmdline partition for block devices by appending "ro" after the (partition name). Signed-off-by: Christian Marangi Link: https://lore.kernel.org/r/20241002221306.4403-3-ansuelsmth@gmail.com Signed-off-by: Jens Axboe --- Documentation/block/cmdline-partition.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Documentation/block/cmdline-partition.rst b/Documentation/block/cmdline-partition.rst index 530bedff548a..526ba201dddc 100644 --- a/Documentation/block/cmdline-partition.rst +++ b/Documentation/block/cmdline-partition.rst @@ -39,13 +39,16 @@ blkdevparts=[;] create a link to block device partition with the name "PARTNAME". User space application can access partition by partition name. +ro + read-only. Flag the partition as read-only. + Example: eMMC disk names are "mmcblk0" and "mmcblk0boot0". bootargs:: - 'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)' + 'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot)ro,-(kernel)' dmesg:: -- 2.51.0 From 9dfd9ea93aeab57d897bb7fc7c0707f26b0b9af8 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 3 Oct 2024 00:11:43 +0200 Subject: [PATCH 13/16] block: introduce add_disk_fwnode() Introduce add_disk_fwnode() as a replacement of device_add_disk() that permits to pass and attach a fwnode to disk dev. This variant can be useful for eMMC that might have the partition table for the disk defined in DT. A parser can later make use of the attached fwnode to parse the related table and init the hardcoded partition for the disk. device_add_disk() is converted to a simple wrapper of add_disk_fwnode() with the fwnode entry set as NULL. Signed-off-by: Christian Marangi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241002221306.4403-4-ansuelsmth@gmail.com Signed-off-by: Jens Axboe --- block/genhd.c | 28 ++++++++++++++++++++++++---- include/linux/blkdev.h | 3 +++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 1c05dd4c6980..bc30eee7ab16 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -383,16 +383,18 @@ int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode) } /** - * device_add_disk - add disk information to kernel list + * add_disk_fwnode - add disk information to kernel list with fwnode * @parent: parent device for the disk * @disk: per-device partitioning information * @groups: Additional per-device sysfs groups + * @fwnode: attached disk fwnode * * This function registers the partitioning information in @disk - * with the kernel. + * with the kernel. Also attach a fwnode to the disk device. */ -int __must_check device_add_disk(struct device *parent, struct gendisk *disk, - const struct attribute_group **groups) +int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, + struct fwnode_handle *fwnode) { struct device *ddev = disk_to_dev(disk); @@ -452,6 +454,8 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk, ddev->parent = parent; ddev->groups = groups; dev_set_name(ddev, "%s", disk->disk_name); + if (fwnode) + device_set_node(ddev, fwnode); if (!(disk->flags & GENHD_FL_HIDDEN)) ddev->devt = MKDEV(disk->major, disk->first_minor); ret = device_add(ddev); @@ -553,6 +557,22 @@ out_exit_elevator: elevator_exit(disk->queue); return ret; } +EXPORT_SYMBOL_GPL(add_disk_fwnode); + +/** + * device_add_disk - add disk information to kernel list + * @parent: parent device for the disk + * @disk: per-device partitioning information + * @groups: Additional per-device sysfs groups + * + * This function registers the partitioning information in @disk + * with the kernel. + */ +int __must_check device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups) +{ + return add_disk_fwnode(parent, disk, groups, NULL); +} EXPORT_SYMBOL(device_add_disk); static void blk_report_disk_dead(struct gendisk *disk, bool surprise) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 50c3b959da28..a6aae750b4ac 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -725,6 +725,9 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) #define for_each_bio(_bio) \ for (; _bio; _bio = _bio->bi_next) +int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups, + struct fwnode_handle *fwnode); int __must_check device_add_disk(struct device *parent, struct gendisk *disk, const struct attribute_group **groups); static inline int __must_check add_disk(struct gendisk *disk) -- 2.51.0 From 3ec7cb11bb9e41e3018656eb7a34e0ce9507785e Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 3 Oct 2024 00:11:44 +0200 Subject: [PATCH 14/16] mmc: block: attach partitions fwnode if found in mmc-card Attach partitions fwnode if found in mmc-card and register disk with it. This permits block partition to reference the node and register a partition table defined in DT for the special case for embedded device that doesn't have a partition table flashed but have an hardcoded partition table passed from the system. JEDEC BOOT partition boot0/boot1 are supported but in DT we refer with the JEDEC name of boot1 and boot2 to better adhere to documentation. Also JEDEC GP partition gp0/1/2/3 are supported but in DT we refer with the JEDEC name of gp1/2/3/4 to better adhere to documentration. Signed-off-by: Christian Marangi Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20241002221306.4403-5-ansuelsmth@gmail.com Signed-off-by: Jens Axboe --- drivers/mmc/core/block.c | 55 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index ef06a4d5d65b..79f6fad97a80 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -2501,6 +2501,56 @@ static inline int mmc_blk_readonly(struct mmc_card *card) !(card->csd.cmdclass & CCC_BLOCK_WRITE); } +/* + * Search for a declared partitions node for the disk in mmc-card related node. + * + * This is to permit support for partition table defined in DT in special case + * where a partition table is not written in the disk and is expected to be + * passed from the running system. + * + * For the user disk, "partitions" node is searched. + * For the special HW disk, "partitions-" node with the appended name is used + * following this conversion table (to adhere to JEDEC naming) + * - boot0 -> partitions-boot1 + * - boot1 -> partitions-boot2 + * - gp0 -> partitions-gp1 + * - gp1 -> partitions-gp2 + * - gp2 -> partitions-gp3 + * - gp3 -> partitions-gp4 + */ +static struct fwnode_handle *mmc_blk_get_partitions_node(struct device *mmc_dev, + const char *subname) +{ + const char *node_name = "partitions"; + + if (subname) { + mmc_dev = mmc_dev->parent; + + /* + * Check if we are allocating a BOOT disk boot0/1 disk. + * In DT we use the JEDEC naming boot1/2. + */ + if (!strcmp(subname, "boot0")) + node_name = "partitions-boot1"; + if (!strcmp(subname, "boot1")) + node_name = "partitions-boot2"; + /* + * Check if we are allocating a GP disk gp0/1/2/3 disk. + * In DT we use the JEDEC naming gp1/2/3/4. + */ + if (!strcmp(subname, "gp0")) + node_name = "partitions-gp1"; + if (!strcmp(subname, "gp1")) + node_name = "partitions-gp2"; + if (!strcmp(subname, "gp2")) + node_name = "partitions-gp3"; + if (!strcmp(subname, "gp3")) + node_name = "partitions-gp4"; + } + + return device_get_named_child_node(mmc_dev, node_name); +} + static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, struct device *parent, sector_t size, @@ -2509,6 +2559,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, int area_type, unsigned int part_type) { + struct fwnode_handle *disk_fwnode; struct mmc_blk_data *md; int devidx, ret; char cap_str[10]; @@ -2610,7 +2661,9 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, /* used in ->open, must be set before add_disk: */ if (area_type == MMC_BLK_DATA_AREA_MAIN) dev_set_drvdata(&card->dev, md); - ret = device_add_disk(md->parent, md->disk, mmc_disk_attr_groups); + disk_fwnode = mmc_blk_get_partitions_node(parent, subname); + ret = add_disk_fwnode(md->parent, md->disk, mmc_disk_attr_groups, + disk_fwnode); if (ret) goto err_put_disk; return md; -- 2.51.0 From 2e3a191e89f951b8cb3a7a0365d4d949cbf9599a Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 3 Oct 2024 00:11:45 +0200 Subject: [PATCH 15/16] block: add support for partition table defined in OF Add support for partition table defined in Device Tree. Similar to how it's done with MTD, add support for defining a fixed partition table in device tree. A common scenario for this is fixed block (eMMC) embedded devices that have no MBR or GPT partition table to save storage space. Bootloader access the block device with absolute address of data. This is to complete the functionality with an equivalent implementation with providing partition table with bootargs, for case where the booargs can't be modified and tweaking the Device Tree is the only solution to have an usabe partition table. The implementation follow the fixed-partitions parser used on MTD devices where a "partitions" node is expected to be declared with "fixed-partitions" compatible in the OF node of the disk device (mmc-card for eMMC for example) and each child node declare a label and a reg with offset and size. If label is not declared, the node name is used as fallback. Eventually is also possible to declare the read-only property to flag the partition as read-only. Signed-off-by: Christian Marangi Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20241002221306.4403-6-ansuelsmth@gmail.com Signed-off-by: Jens Axboe --- block/partitions/Kconfig | 9 ++++ block/partitions/Makefile | 1 + block/partitions/check.h | 1 + block/partitions/core.c | 3 ++ block/partitions/of.c | 110 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 124 insertions(+) create mode 100644 block/partitions/of.c diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig index 7aff4eb81c60..ce17e41451af 100644 --- a/block/partitions/Kconfig +++ b/block/partitions/Kconfig @@ -270,4 +270,13 @@ config CMDLINE_PARTITION Say Y here if you want to read the partition table from bootargs. The format for the command line is just like mtdparts. +config OF_PARTITION + bool "Device Tree partition support" if PARTITION_ADVANCED + depends on OF + help + Say Y here if you want to enable support for partition table + defined in Device Tree. (mainly for eMMC) + The format for the device tree node is just like MTD fixed-partition + schema. + endmenu diff --git a/block/partitions/Makefile b/block/partitions/Makefile index a7f05cdb02a8..25d424922c6e 100644 --- a/block/partitions/Makefile +++ b/block/partitions/Makefile @@ -12,6 +12,7 @@ obj-$(CONFIG_CMDLINE_PARTITION) += cmdline.o obj-$(CONFIG_MAC_PARTITION) += mac.o obj-$(CONFIG_LDM_PARTITION) += ldm.o obj-$(CONFIG_MSDOS_PARTITION) += msdos.o +obj-$(CONFIG_OF_PARTITION) += of.o obj-$(CONFIG_OSF_PARTITION) += osf.o obj-$(CONFIG_SGI_PARTITION) += sgi.o obj-$(CONFIG_SUN_PARTITION) += sun.o diff --git a/block/partitions/check.h b/block/partitions/check.h index 8d70a880c372..e5c1c61eb353 100644 --- a/block/partitions/check.h +++ b/block/partitions/check.h @@ -62,6 +62,7 @@ int karma_partition(struct parsed_partitions *state); int ldm_partition(struct parsed_partitions *state); int mac_partition(struct parsed_partitions *state); int msdos_partition(struct parsed_partitions *state); +int of_partition(struct parsed_partitions *state); int osf_partition(struct parsed_partitions *state); int sgi_partition(struct parsed_partitions *state); int sun_partition(struct parsed_partitions *state); diff --git a/block/partitions/core.c b/block/partitions/core.c index 629ed08b9ab9..cdad05f97647 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -43,6 +43,9 @@ static int (*const check_part[])(struct parsed_partitions *) = { #ifdef CONFIG_CMDLINE_PARTITION cmdline_partition, #endif +#ifdef CONFIG_OF_PARTITION + of_partition, /* cmdline have priority to OF */ +#endif #ifdef CONFIG_EFI_PARTITION efi_partition, /* this must come before msdos */ #endif diff --git a/block/partitions/of.c b/block/partitions/of.c new file mode 100644 index 000000000000..4e760fdffb3f --- /dev/null +++ b/block/partitions/of.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include +#include "check.h" + +static int validate_of_partition(struct device_node *np, int slot) +{ + u64 offset, size; + int len; + + const __be32 *reg = of_get_property(np, "reg", &len); + int a_cells = of_n_addr_cells(np); + int s_cells = of_n_size_cells(np); + + /* Make sure reg len match the expected addr and size cells */ + if (len / sizeof(*reg) != a_cells + s_cells) + return -EINVAL; + + /* Validate offset conversion from bytes to sectors */ + offset = of_read_number(reg, a_cells); + if (offset % SECTOR_SIZE) + return -EINVAL; + + /* Validate size conversion from bytes to sectors */ + size = of_read_number(reg + a_cells, s_cells); + if (!size || size % SECTOR_SIZE) + return -EINVAL; + + return 0; +} + +static void add_of_partition(struct parsed_partitions *state, int slot, + struct device_node *np) +{ + struct partition_meta_info *info; + char tmp[sizeof(info->volname) + 4]; + const char *partname; + int len; + + const __be32 *reg = of_get_property(np, "reg", &len); + int a_cells = of_n_addr_cells(np); + int s_cells = of_n_size_cells(np); + + /* Convert bytes to sector size */ + u64 offset = of_read_number(reg, a_cells) / SECTOR_SIZE; + u64 size = of_read_number(reg + a_cells, s_cells) / SECTOR_SIZE; + + put_partition(state, slot, offset, size); + + if (of_property_read_bool(np, "read-only")) + state->parts[slot].flags |= ADDPART_FLAG_READONLY; + + /* + * Follow MTD label logic, search for label property, + * fallback to node name if not found. + */ + info = &state->parts[slot].info; + partname = of_get_property(np, "label", &len); + if (!partname) + partname = of_get_property(np, "name", &len); + strscpy(info->volname, partname, sizeof(info->volname)); + + snprintf(tmp, sizeof(tmp), "(%s)", info->volname); + strlcat(state->pp_buf, tmp, PAGE_SIZE); +} + +int of_partition(struct parsed_partitions *state) +{ + struct device *ddev = disk_to_dev(state->disk); + struct device_node *np; + int slot; + + struct device_node *partitions_np = of_node_get(ddev->of_node); + + if (!partitions_np || + !of_device_is_compatible(partitions_np, "fixed-partitions")) + return 0; + + slot = 1; + /* Validate parition offset and size */ + for_each_child_of_node(partitions_np, np) { + if (validate_of_partition(np, slot)) { + of_node_put(np); + of_node_put(partitions_np); + + return -1; + } + + slot++; + } + + slot = 1; + for_each_child_of_node(partitions_np, np) { + if (slot >= state->limit) { + of_node_put(np); + break; + } + + add_of_partition(state, slot, np); + + slot++; + } + + strlcat(state->pp_buf, "\n", PAGE_SIZE); + + return 1; +} -- 2.51.0 From f7a4b3438c6f5e95a6ae814b39bf6623a007dec8 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Thu, 3 Oct 2024 00:11:46 +0200 Subject: [PATCH 16/16] dt-bindings: mmc: Document support for partition table in mmc-card Document support for defining a partition table in the mmc-card node. This is needed if the eMMC doesn't have a partition table written and the bootloader of the device load data by using absolute offset of the block device. This is common on embedded device that have eMMC installed to save space and have non removable block devices. If an OF partition table is detected, any partition table written in the eMMC will be ignored and won't be parsed. eMMC provide a generic disk for user data and if supported (JEDEC 4.4+) also provide two additional disk ("boot1" and "boot2") for special usage of boot operation where normally is stored the bootloader or boot info. New JEDEC version also supports up to 4 GP partition for other usage called "gp1", "gp2", "gp3", "gp4". Signed-off-by: Christian Marangi Reviewed-by: Rob Herring (Arm) Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20241002221306.4403-7-ansuelsmth@gmail.com Signed-off-by: Jens Axboe --- .../devicetree/bindings/mmc/mmc-card.yaml | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/Documentation/devicetree/bindings/mmc/mmc-card.yaml b/Documentation/devicetree/bindings/mmc/mmc-card.yaml index fd347126449a..1d91d4272de0 100644 --- a/Documentation/devicetree/bindings/mmc/mmc-card.yaml +++ b/Documentation/devicetree/bindings/mmc/mmc-card.yaml @@ -13,6 +13,10 @@ description: | This documents describes the devicetree bindings for a mmc-host controller child node describing a mmc-card / an eMMC. + It's possible to define a fixed partition table for an eMMC for the user + partition, the 2 BOOT partition (boot1/2) and the 4 GP (gp1/2/3/4) if supported + by the eMMC. + properties: compatible: const: mmc-card @@ -26,6 +30,24 @@ properties: Use this to indicate that the mmc-card has a broken hpi implementation, and that hpi should not be used. +patternProperties: + "^partitions(-boot[12]|-gp[14])?$": + $ref: /schemas/mtd/partitions/partitions.yaml + + patternProperties: + "^partition@[0-9a-f]+$": + $ref: /schemas/mtd/partitions/partition.yaml + + properties: + reg: + description: Must be multiple of 512 as it's converted + internally from bytes to SECTOR_SIZE (512 bytes) + + required: + - reg + + unevaluatedProperties: false + required: - compatible - reg @@ -42,6 +64,36 @@ examples: compatible = "mmc-card"; reg = <0>; broken-hpi; + + partitions { + compatible = "fixed-partitions"; + + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "kernel"; /* Kernel */ + reg = <0x0 0x2000000>; /* 32 MB */ + }; + + partition@2000000 { + label = "rootfs"; + reg = <0x2000000 0x40000000>; /* 1GB */ + }; + }; + + partitions-boot1 { + compatible = "fixed-partitions"; + + #address-cells = <1>; + #size-cells = <1>; + + partition@0 { + label = "bl"; + reg = <0x0 0x2000000>; /* 32MB */ + read-only; + }; + }; }; }; -- 2.51.0