From dc0a241ceaf3b7df6f1a7658b020c92682b75bfc Mon Sep 17 00:00:00 2001 From: Michael Jeanson Date: Wed, 19 Feb 2025 15:53:26 -0500 Subject: [PATCH 01/16] rseq: Fix rseq registration with CONFIG_DEBUG_RSEQ With CONFIG_DEBUG_RSEQ=y, at rseq registration the read-only fields are copied from user-space, if this copy fails the syscall returns -EFAULT and the registration should not be activated - but it erroneously is. Move the activation of the registration after the copy of the fields to fix this bug. Fixes: 7d5265ffcd8b ("rseq: Validate read-only fields under DEBUG_RSEQ config") Signed-off-by: Michael Jeanson Signed-off-by: Ingo Molnar Reviewed-by: Mathieu Desnoyers Link: https://lore.kernel.org/r/20250219205330.324770-1-mjeanson@efficios.com --- kernel/rseq.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/rseq.c b/kernel/rseq.c index 442aba29bc4c..2cb16091ec0a 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -507,9 +507,6 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, return -EINVAL; if (!access_ok(rseq, rseq_len)) return -EFAULT; - current->rseq = rseq; - current->rseq_len = rseq_len; - current->rseq_sig = sig; #ifdef CONFIG_DEBUG_RSEQ /* * Initialize the in-kernel rseq fields copy for validation of @@ -521,6 +518,14 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid)) return -EFAULT; #endif + /* + * Activate the registration by setting the rseq area address, length + * and signature in the task struct. + */ + current->rseq = rseq; + current->rseq_len = rseq_len; + current->rseq_sig = sig; + /* * If rseq was previously inactive, and has just been * registered, ensure the cpu_id_start and cpu_id fields -- 2.51.0 From c9876cdb3ac4dcdf3c710ff02094165982e2a557 Mon Sep 17 00:00:00 2001 From: Brian Ochoa Date: Wed, 19 Feb 2025 10:09:20 -0500 Subject: [PATCH 02/16] docs: arch/x86/sva: Fix two grammar errors under Background and FAQ - Correct "in order" to "in order to" - Append missing quantifier Signed-off-by: Brian Ochoa Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250219150920.445802-1-brianeochoa@gmail.com --- Documentation/arch/x86/sva.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/x86/sva.rst b/Documentation/arch/x86/sva.rst index 33cb05005982..6a759984d471 100644 --- a/Documentation/arch/x86/sva.rst +++ b/Documentation/arch/x86/sva.rst @@ -25,7 +25,7 @@ to cache translations for virtual addresses. The IOMMU driver uses the mmu_notifier() support to keep the device TLB cache and the CPU cache in sync. When an ATS lookup fails for a virtual address, the device should use the PRI in order to request the virtual address to be paged into the -CPU page tables. The device must use ATS again in order the fetch the +CPU page tables. The device must use ATS again in order to fetch the translation before use. Shared Hardware Workqueues @@ -216,7 +216,7 @@ submitting work and processing completions. Single Root I/O Virtualization (SR-IOV) focuses on providing independent hardware interfaces for virtualizing hardware. Hence, it's required to be -almost fully functional interface to software supporting the traditional +an almost fully functional interface to software supporting the traditional BARs, space for interrupts via MSI-X, its own register layout. Virtual Functions (VFs) are assisted by the Physical Function (PF) driver. -- 2.51.0 From 38b14061947fa546491656e3f5e388d4fedf8dba Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Feb 2025 15:20:10 -0500 Subject: [PATCH 03/16] ftrace: Fix accounting of adding subops to a manager ops Function graph uses a subops and manager ops mechanism to attach to ftrace. The manager ops connects to ftrace and the functions it connects to is defined by a list of subops that it manages. The function hash that defines what the above ops attaches to limits the functions to attach if the hash has any content. If the hash is empty, it means to trace all functions. The creation of the manager ops hash is done by iterating over all the subops hashes. If any of the subops hashes is empty, it means that the manager ops hash must trace all functions as well. The issue is in the creation of the manager ops. When a second subops is attached, a new hash is created by starting it as NULL and adding the subops one at a time. But the NULL ops is mistaken as an empty hash, and once an empty hash is found, it stops the loop of subops and just enables all functions. # echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions kernel_clone (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 # echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions trace_initcall_start_cb (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 try_to_run_init_process (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 cleanup_rapl_pmus (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 uncore_free_pcibus_map (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 uncore_types_exit (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 kvm_shutdown (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 vmx_dump_msrs (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 vmx_cleanup_l1d_flush (1) tramp: 0xffffffffc0309000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 [..] Fix this by initializing the new hash to NULL and if the hash is NULL do not treat it as an empty hash but instead allocate by copying the content of the first sub ops. Then on subsequent iterations, the new hash will not be NULL, but the content of the previous subops. If that first subops attached to all functions, then new hash may assume that the manager ops also needs to attach to all functions. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Heiko Carstens Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250220202055.060300046@goodmis.org Fixes: 5fccc7552ccbc ("ftrace: Add subops logic to allow one ops to manage many") Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 728ecda6e8d4..bec54dc27204 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3220,15 +3220,22 @@ static struct ftrace_hash *copy_hash(struct ftrace_hash *src) * The filter_hash updates uses just the append_hash() function * and the notrace_hash does not. */ -static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash) +static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash, + int size_bits) { struct ftrace_func_entry *entry; int size; int i; - /* An empty hash does everything */ - if (ftrace_hash_empty(*hash)) - return 0; + if (*hash) { + /* An empty hash does everything */ + if (ftrace_hash_empty(*hash)) + return 0; + } else { + *hash = alloc_ftrace_hash(size_bits); + if (!*hash) + return -ENOMEM; + } /* If new_hash has everything make hash have everything */ if (ftrace_hash_empty(new_hash)) { @@ -3292,16 +3299,18 @@ static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_has /* Return a new hash that has a union of all @ops->filter_hash entries */ static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) { - struct ftrace_hash *new_hash; + struct ftrace_hash *new_hash = NULL; struct ftrace_ops *subops; + int size_bits; int ret; - new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits); - if (!new_hash) - return NULL; + if (ops->func_hash->filter_hash) + size_bits = ops->func_hash->filter_hash->size_bits; + else + size_bits = FTRACE_HASH_DEFAULT_BITS; list_for_each_entry(subops, &ops->subop_list, list) { - ret = append_hash(&new_hash, subops->func_hash->filter_hash); + ret = append_hash(&new_hash, subops->func_hash->filter_hash, size_bits); if (ret < 0) { free_ftrace_hash(new_hash); return NULL; @@ -3310,7 +3319,8 @@ static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) if (ftrace_hash_empty(new_hash)) break; } - return new_hash; + /* Can't return NULL as that means this failed */ + return new_hash ? : EMPTY_HASH; } /* Make @ops trace evenything except what all its subops do not trace */ @@ -3505,7 +3515,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash); if (!filter_hash) return -ENOMEM; - ret = append_hash(&filter_hash, subops->func_hash->filter_hash); + ret = append_hash(&filter_hash, subops->func_hash->filter_hash, + size_bits); if (ret < 0) { free_ftrace_hash(filter_hash); return ret; -- 2.51.0 From 8eb4b09e0bbd30981305643229fe7640ad41b667 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Feb 2025 15:20:11 -0500 Subject: [PATCH 04/16] ftrace: Do not add duplicate entries in subops manager ops Check if a function is already in the manager ops of a subops. A manager ops contains multiple subops, and if two or more subops are tracing the same function, the manager ops only needs a single entry in its hash. Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250220202055.226762894@goodmis.org Fixes: 4f554e955614f ("ftrace: Add ftrace_set_filter_ips function") Tested-by: Heiko Carstens Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index bec54dc27204..6b0c25761ccb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5718,6 +5718,9 @@ __ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) return -ENOENT; free_hash_entry(hash, entry); return 0; + } else if (__ftrace_lookup_ip(hash, ip) != NULL) { + /* Already exists */ + return 0; } entry = add_hash_entry(hash, ip); -- 2.51.0 From ded9140622358a154efb3a777025fa7f7ae2c2d9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Feb 2025 15:20:12 -0500 Subject: [PATCH 05/16] fprobe: Always unregister fgraph function from ops When the last fprobe is removed, it calls unregister_ftrace_graph() to remove the graph_ops from function graph. The issue is when it does so, it calls return before removing the function from its graph ops via ftrace_set_filter_ips(). This leaves the last function lingering in the fprobe's fgraph ops and if a probe is added it also enables that last function (even though the callback will just drop it, it does add unneeded overhead to make that call). # echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 # echo "f:myevent2 schedule_timeout" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions kernel_clone (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 schedule_timeout (1) tramp: 0xffffffffc02f3000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 # > /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions # echo "f:myevent3 kmem_cache_free" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions kmem_cache_free (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 schedule_timeout (1) tramp: 0xffffffffc0219000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 The above enabled a fprobe on kernel_clone, and then on schedule_timeout. The content of the enabled_functions shows the functions that have a callback attached to them. The fprobe attached to those functions properly. Then the fprobes were cleared, and enabled_functions was empty after that. But after adding a fprobe on kmem_cache_free, the enabled_functions shows that the schedule_timeout was attached again. This is because it was still left in the fprobe ops that is used to tell function graph what functions it wants callbacks from. Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250220202055.393254452@goodmis.org Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer") Tested-by: Heiko Carstens Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fprobe.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 2560b312ad57..62e8f7d56602 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -403,11 +403,9 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num) lockdep_assert_held(&fprobe_mutex); fprobe_graph_active--; - if (!fprobe_graph_active) { - /* Q: should we unregister it ? */ + /* Q: should we unregister it ? */ + if (!fprobe_graph_active) unregister_ftrace_graph(&fprobe_graph_ops); - return; - } ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); } -- 2.51.0 From ca26554a1498bc905c4a39fb42d55d93f3ae8df2 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Feb 2025 15:20:13 -0500 Subject: [PATCH 06/16] fprobe: Fix accounting of when to unregister from function graph When adding a new fprobe, it will update the function hash to the functions the fprobe is attached to and register with function graph to have it call the registered functions. The fprobe_graph_active variable keeps track of the number of fprobes that are using function graph. If two fprobes attach to the same function, it increments the fprobe_graph_active for each of them. But when they are removed, the first fprobe to be removed will see that the function it is attached to is also used by another fprobe and it will not remove that function from function_graph. The logic will skip decrementing the fprobe_graph_active variable. This causes the fprobe_graph_active variable to not go to zero when all fprobes are removed, and in doing so it does not unregister from function graph. As the fgraph ops hash will now be empty, and an empty filter hash means all functions are enabled, this triggers function graph to add a callback to the fprobe infrastructure for every function! # echo "f:myevent1 kernel_clone" >> /sys/kernel/tracing/dynamic_events # echo "f:myevent2 kernel_clone%return" >> /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions kernel_clone (1) tramp: 0xffffffffc0024000 (ftrace_graph_func+0x0/0x60) ->ftrace_graph_func+0x0/0x60 # > /sys/kernel/tracing/dynamic_events # cat /sys/kernel/tracing/enabled_functions trace_initcall_start_cb (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 try_to_run_init_process (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 x86_pmu_show_pmu_cap (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 cleanup_rapl_pmus (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 uncore_free_pcibus_map (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 uncore_types_exit (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 uncore_pci_exit.part.0 (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 kvm_shutdown (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 vmx_dump_msrs (1) tramp: 0xffffffffc0026000 (function_trace_call+0x0/0x170) ->function_trace_call+0x0/0x170 [..] # cat /sys/kernel/tracing/enabled_functions | wc -l 54702 If a fprobe is being removed and all its functions are also traced by other fprobes, still decrement the fprobe_graph_active counter. Cc: stable@vger.kernel.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250220202055.565129766@goodmis.org Fixes: 4346ba1604093 ("fprobe: Rewrite fprobe on function-graph tracer") Closes: https://lore.kernel.org/all/20250217114918.10397-A-hca@linux.ibm.com/ Reported-by: Heiko Carstens Tested-by: Heiko Carstens Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/fprobe.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 62e8f7d56602..33082c4e8154 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -407,7 +407,8 @@ static void fprobe_graph_remove_ips(unsigned long *addrs, int num) if (!fprobe_graph_active) unregister_ftrace_graph(&fprobe_graph_ops); - ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); + if (num) + ftrace_set_filter_ips(&fprobe_graph_ops.ops, addrs, num, 1, 0); } static int symbols_cmp(const void *a, const void *b) @@ -677,8 +678,7 @@ int unregister_fprobe(struct fprobe *fp) } del_fprobe_hash(fp); - if (count) - fprobe_graph_remove_ips(addrs, count); + fprobe_graph_remove_ips(addrs, count); kfree_rcu(hlist_array, rcu); fp->hlist_array = NULL; -- 2.51.0 From e85c5e9792b942381ad92ccd0ff745b6d408a91f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 20 Feb 2025 15:20:14 -0500 Subject: [PATCH 07/16] selftests/ftrace: Update fprobe test to check enabled_functions file A few bugs were found in the fprobe accounting logic along with it using the function graph infrastructure. Update the fprobe selftest to catch those bugs in case they or something similar shows up in the future. The test now checks the enabled_functions file which shows all the functions attached to ftrace or fgraph. When enabling a fprobe, make sure that its corresponding function is also added to that file. Also add two more fprobes to enable to make sure that the fprobe logic works properly with multiple probes. Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Sven Schnelle Cc: Vasily Gorbik Cc: Alexander Gordeev Link: https://lore.kernel.org/20250220202055.733001756@goodmis.org Acked-by: Masami Hiramatsu (Google) Tested-by: Heiko Carstens Signed-off-by: Steven Rostedt (Google) --- .../test.d/dynevent/add_remove_fprobe.tc | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc index dc25bcf4f9e2..449f9d8be746 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -7,12 +7,38 @@ echo 0 > events/enable echo > dynamic_events PLACE=$FUNCTION_FORK +PLACE2="kmem_cache_free" +PLACE3="schedule_timeout" echo "f:myevent1 $PLACE" >> dynamic_events + +# Make sure the event is attached and is the only one +grep -q $PLACE enabled_functions +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 1 ]; then + exit_fail +fi + echo "f:myevent2 $PLACE%return" >> dynamic_events +# It should till be the only attached function +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 1 ]; then + exit_fail +fi + +# add another event +echo "f:myevent3 $PLACE2" >> dynamic_events + +grep -q $PLACE2 enabled_functions +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 2 ]; then + exit_fail +fi + grep -q myevent1 dynamic_events grep -q myevent2 dynamic_events +grep -q myevent3 dynamic_events test -d events/fprobes/myevent1 test -d events/fprobes/myevent2 @@ -21,6 +47,34 @@ echo "-:myevent2" >> dynamic_events grep -q myevent1 dynamic_events ! grep -q myevent2 dynamic_events +# should still have 2 left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 2 ]; then + exit_fail +fi + echo > dynamic_events +# Should have none left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 0 ]; then + exit_fail +fi + +echo "f:myevent4 $PLACE" >> dynamic_events + +# Should only have one enabled +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 1 ]; then + exit_fail +fi + +echo > dynamic_events + +# Should have none left +cnt=`cat enabled_functions | wc -l` +if [ $cnt -ne 0 ]; then + exit_fail +fi + clear_trace -- 2.51.0 From 57b76bedc5c52c66968183b5ef57234894c25ce7 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 20 Feb 2025 15:07:49 +0100 Subject: [PATCH 08/16] ftrace: Correct preemption accounting for function tracing. The function tracer should record the preemption level at the point when the function is invoked. If the tracing subsystem decrement the preemption counter it needs to correct this before feeding the data into the trace buffer. This was broken in the commit cited below while shifting the preempt-disabled section. Use tracing_gen_ctx_dec() which properly subtracts one from the preemption counter on a preemptible kernel. Cc: stable@vger.kernel.org Cc: Wander Lairson Costa Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Thomas Gleixner Link: https://lore.kernel.org/20250220140749.pfw8qoNZ@linutronix.de Fixes: ce5e48036c9e7 ("ftrace: disable preemption when recursion locked") Signed-off-by: Sebastian Andrzej Siewior Tested-by: Wander Lairson Costa Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_functions.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index d358c9935164..df56f9b76010 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -216,7 +216,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, parent_ip = function_get_true_parent_ip(parent_ip, fregs); - trace_ctx = tracing_gen_ctx(); + trace_ctx = tracing_gen_ctx_dec(); data = this_cpu_ptr(tr->array_buffer.data); if (!atomic_read(&data->disabled)) @@ -321,7 +321,6 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, struct trace_array *tr = op->private; struct trace_array_cpu *data; unsigned int trace_ctx; - unsigned long flags; int bit; if (unlikely(!tr->function_enabled)) @@ -347,8 +346,7 @@ function_no_repeats_trace_call(unsigned long ip, unsigned long parent_ip, if (is_repeat_check(tr, last_info, ip, parent_ip)) goto out; - local_save_flags(flags); - trace_ctx = tracing_gen_ctx_flags(flags); + trace_ctx = tracing_gen_ctx_dec(); process_repeats(tr, ip, parent_ip, last_info, trace_ctx); trace_function(tr, ip, parent_ip, trace_ctx); -- 2.51.0 From 2fa6a01345b538faa7b0fae8f723bb6977312428 Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Thu, 20 Feb 2025 11:15:28 +0800 Subject: [PATCH 09/16] tracing: Fix memory leak when reading set_event file kmemleak reports the following memory leak after reading set_event file: # cat /sys/kernel/tracing/set_event # cat /sys/kernel/debug/kmemleak unreferenced object 0xff110001234449e0 (size 16): comm "cat", pid 13645, jiffies 4294981880 hex dump (first 16 bytes): 01 00 00 00 00 00 00 00 a8 71 e7 84 ff ff ff ff .........q...... backtrace (crc c43abbc): __kmalloc_cache_noprof+0x3ca/0x4b0 s_start+0x72/0x2d0 seq_read_iter+0x265/0x1080 seq_read+0x2c9/0x420 vfs_read+0x166/0xc30 ksys_read+0xf4/0x1d0 do_syscall_64+0x79/0x150 entry_SYSCALL_64_after_hwframe+0x76/0x7e The issue can be reproduced regardless of whether set_event is empty or not. Here is an example about the valid content of set_event. # cat /sys/kernel/tracing/set_event sched:sched_process_fork sched:sched_switch sched:sched_wakeup *:*:mod:trace_events_sample The root cause is that s_next() returns NULL when nothing is found. This results in s_stop() attempting to free a NULL pointer because its parameter is NULL. Fix the issue by freeing the memory appropriately when s_next() fails to find anything. Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250220031528.7373-1-ahuang12@lenovo.com Fixes: b355247df104 ("tracing: Cache ":mod:" events for modules not loaded yet") Signed-off-by: Adrian Huang Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4cb275316e51..513de9ceb80e 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1591,6 +1591,13 @@ s_next(struct seq_file *m, void *v, loff_t *pos) return iter; #endif + /* + * The iter is allocated in s_start() and passed via the 'v' + * parameter. To stop the iterator, NULL must be returned. But + * the return value is what the 'v' parameter in s_stop() receives + * and frees. Free iter here as it will no longer be used. + */ + kfree(iter); return NULL; } @@ -1667,9 +1674,9 @@ static int s_show(struct seq_file *m, void *v) } #endif -static void s_stop(struct seq_file *m, void *p) +static void s_stop(struct seq_file *m, void *v) { - kfree(p); + kfree(v); t_stop(m, NULL); } -- 2.51.0 From 781813db7909d945c33d3b035822225f3598774d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 20 Feb 2025 16:12:12 +0100 Subject: [PATCH 10/16] i2c: core: Allocate temporary client dynamically MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit drivers/i2c/i2c-core-base.c: In function ‘i2c_detect.isra’: drivers/i2c/i2c-core-base.c:2544:1: warning: the frame size of 1312 bytes is larger than 1024 bytes [-Wframe-larger-than=] 2544 | } | ^ Fix this by allocating the temporary client structure dynamically, as it is a rather large structure (1216 bytes, depending on kernel config). This is basically a revert of the to-be-fixed commit with some checkpatch improvements. Fixes: 735668f8e5c9 ("i2c: core: Allocate temp client on the stack in i2c_detect") Signed-off-by: Geert Uytterhoeven Reviewed-by: Su Hui Reviewed-by: Guenter Roeck [wsa: updated commit message, merged tags from similar patch] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 35a221e2c11c..7ad1ad5c8c3f 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2506,7 +2506,7 @@ static int i2c_detect_address(struct i2c_client *temp_client, static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver) { const unsigned short *address_list; - struct i2c_client temp_client; + struct i2c_client *temp_client; int i, err = 0; address_list = driver->address_list; @@ -2527,19 +2527,24 @@ static int i2c_detect(struct i2c_adapter *adapter, struct i2c_driver *driver) return 0; /* Set up a temporary client to help detect callback */ - memset(&temp_client, 0, sizeof(temp_client)); - temp_client.adapter = adapter; + temp_client = kzalloc(sizeof(*temp_client), GFP_KERNEL); + if (!temp_client) + return -ENOMEM; + + temp_client->adapter = adapter; for (i = 0; address_list[i] != I2C_CLIENT_END; i += 1) { dev_dbg(&adapter->dev, "found normal entry for adapter %d, addr 0x%02x\n", i2c_adapter_id(adapter), address_list[i]); - temp_client.addr = address_list[i]; - err = i2c_detect_address(&temp_client, driver); + temp_client->addr = address_list[i]; + err = i2c_detect_address(temp_client, driver); if (unlikely(err)) break; } + kfree(temp_client); + return err; } -- 2.51.0 From d082ecbc71e9e0bf49883ee4afd435a77a5101b6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Feb 2025 12:32:57 -0800 Subject: [PATCH 11/16] Linux 6.14-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 96407c1d6be1..30dab4c8b012 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 5797c04400ee117bfe459ff1e468d0ea38054ab4 Mon Sep 17 00:00:00 2001 From: Paul Fertser Date: Thu, 23 Jan 2025 15:20:02 +0300 Subject: [PATCH 12/16] hwmon: (peci/dimmtemp) Do not provide fake thresholds data When an Icelake or Sapphire Rapids CPU isn't providing the maximum and critical thresholds for particular DIMM the driver should return an error to the userspace instead of giving it stale (best case) or wrong (the structure contains all zeros after kzalloc() call) data. The issue can be reproduced by binding the peci driver while the host is fully booted and idle, this makes PECI interaction unreliable enough. Fixes: 73bc1b885dae ("hwmon: peci: Add dimmtemp driver") Fixes: 621995b6d795 ("hwmon: (peci/dimmtemp) Add Sapphire Rapids support") Cc: stable@vger.kernel.org Signed-off-by: Paul Fertser Reviewed-by: Iwona Winiarska Link: https://lore.kernel.org/r/20250123122003.6010-1-fercerpav@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/peci/dimmtemp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/hwmon/peci/dimmtemp.c b/drivers/hwmon/peci/dimmtemp.c index d6762259dd69..fbe82d9852e0 100644 --- a/drivers/hwmon/peci/dimmtemp.c +++ b/drivers/hwmon/peci/dimmtemp.c @@ -127,8 +127,6 @@ static int update_thresholds(struct peci_dimmtemp *priv, int dimm_no) return 0; ret = priv->gen_info->read_thresholds(priv, dimm_order, chan_rank, &data); - if (ret == -ENODATA) /* Use default or previous value */ - return 0; if (ret) return ret; @@ -509,11 +507,11 @@ read_thresholds_icx(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u ret = peci_ep_pci_local_read(priv->peci_dev, 0, 13, 0, 2, 0xd4, ®_val); if (ret || !(reg_val & BIT(31))) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; ret = peci_ep_pci_local_read(priv->peci_dev, 0, 13, 0, 2, 0xd0, ®_val); if (ret) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; /* * Device 26, Offset 224e0: IMC 0 channel 0 -> rank 0 @@ -546,11 +544,11 @@ read_thresholds_spr(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u ret = peci_ep_pci_local_read(priv->peci_dev, 0, 30, 0, 2, 0xd4, ®_val); if (ret || !(reg_val & BIT(31))) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; ret = peci_ep_pci_local_read(priv->peci_dev, 0, 30, 0, 2, 0xd0, ®_val); if (ret) - return -ENODATA; /* Use default or previous value */ + return -ENODATA; /* * Device 26, Offset 219a8: IMC 0 channel 0 -> rank 0 -- 2.51.0 From 6b6e2e8fd0de3fa7c6f4f8fe6841b01770b2e7bc Mon Sep 17 00:00:00 2001 From: Titus Rwantare Date: Thu, 27 Feb 2025 22:24:55 +0000 Subject: [PATCH 13/16] hwmon: (pmbus) Initialise page count in pmbus_identify() The `pmbus_identify()` function fails to correctly determine the number of supported pages on PMBus devices. This occurs because `info->pages` is implicitly zero-initialised, and `pmbus_set_page()` does not perform writes to the page register if `info->pages` is not yet initialised. Without this patch, `info->pages` is always set to the maximum after scanning. This patch initialises `info->pages` to `PMBUS_PAGES` before the probing loop, enabling `pmbus_set_page()` writes to make it out onto the bus correctly identifying the number of pages. `PMBUS_PAGES` seemed like a reasonable non-zero number because that's the current result of the identification process. Testing was done with a PMBus device in QEMU. Signed-off-by: Titus Rwantare Fixes: 442aba78728e7 ("hwmon: PMBus device driver") Link: https://lore.kernel.org/r/20250227222455.2583468-1-titusr@google.com Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/pmbus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hwmon/pmbus/pmbus.c b/drivers/hwmon/pmbus/pmbus.c index 77cf268e7d2d..920cd5408141 100644 --- a/drivers/hwmon/pmbus/pmbus.c +++ b/drivers/hwmon/pmbus/pmbus.c @@ -103,6 +103,8 @@ static int pmbus_identify(struct i2c_client *client, if (pmbus_check_byte_register(client, 0, PMBUS_PAGE)) { int page; + info->pages = PMBUS_PAGES; + for (page = 1; page < PMBUS_PAGES; page++) { if (pmbus_set_page(client, page, 0xff) < 0) break; -- 2.51.0 From 1c7932d5ae0f5c22fa52ac811b4c427bbca5aff5 Mon Sep 17 00:00:00 2001 From: Maud Spierings Date: Thu, 27 Feb 2025 13:57:53 +0100 Subject: [PATCH 14/16] hwmon: (ntc_thermistor) Fix the ncpXXxh103 sensor table I could not find a single table that has the values currently present in the table, change it to the actual values that can be found in [1]/[2] and [3] (page 15 column 2) [1]: https://www.murata.com/products/productdetail?partno=NCP15XH103F03RC [2]: https://www.murata.com/products/productdata/8796836626462/NTHCG83.txt?1437969843000 [3]: https://nl.mouser.com/datasheet/2/281/r44e-522712.pdf Fixes: 54ce3a0d8011 ("hwmon: (ntc_thermistor) Add support for ncpXXxh103") Signed-off-by: Maud Spierings Link: https://lore.kernel.org/r/20250227-ntc_thermistor_fixes-v1-3-70fa73200b52@gocontroll.com Reviewed-by: Linus Walleij Signed-off-by: Guenter Roeck --- drivers/hwmon/ntc_thermistor.c | 66 +++++++++++++++++----------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index b5352900463f..0d29c8f97ba7 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -181,40 +181,40 @@ static const struct ntc_compensation ncpXXwf104[] = { }; static const struct ntc_compensation ncpXXxh103[] = { - { .temp_c = -40, .ohm = 247565 }, - { .temp_c = -35, .ohm = 181742 }, - { .temp_c = -30, .ohm = 135128 }, - { .temp_c = -25, .ohm = 101678 }, - { .temp_c = -20, .ohm = 77373 }, - { .temp_c = -15, .ohm = 59504 }, - { .temp_c = -10, .ohm = 46222 }, - { .temp_c = -5, .ohm = 36244 }, - { .temp_c = 0, .ohm = 28674 }, - { .temp_c = 5, .ohm = 22878 }, - { .temp_c = 10, .ohm = 18399 }, - { .temp_c = 15, .ohm = 14910 }, - { .temp_c = 20, .ohm = 12169 }, + { .temp_c = -40, .ohm = 195652 }, + { .temp_c = -35, .ohm = 148171 }, + { .temp_c = -30, .ohm = 113347 }, + { .temp_c = -25, .ohm = 87559 }, + { .temp_c = -20, .ohm = 68237 }, + { .temp_c = -15, .ohm = 53650 }, + { .temp_c = -10, .ohm = 42506 }, + { .temp_c = -5, .ohm = 33892 }, + { .temp_c = 0, .ohm = 27219 }, + { .temp_c = 5, .ohm = 22021 }, + { .temp_c = 10, .ohm = 17926 }, + { .temp_c = 15, .ohm = 14674 }, + { .temp_c = 20, .ohm = 12081 }, { .temp_c = 25, .ohm = 10000 }, - { .temp_c = 30, .ohm = 8271 }, - { .temp_c = 35, .ohm = 6883 }, - { .temp_c = 40, .ohm = 5762 }, - { .temp_c = 45, .ohm = 4851 }, - { .temp_c = 50, .ohm = 4105 }, - { .temp_c = 55, .ohm = 3492 }, - { .temp_c = 60, .ohm = 2985 }, - { .temp_c = 65, .ohm = 2563 }, - { .temp_c = 70, .ohm = 2211 }, - { .temp_c = 75, .ohm = 1915 }, - { .temp_c = 80, .ohm = 1666 }, - { .temp_c = 85, .ohm = 1454 }, - { .temp_c = 90, .ohm = 1275 }, - { .temp_c = 95, .ohm = 1121 }, - { .temp_c = 100, .ohm = 990 }, - { .temp_c = 105, .ohm = 876 }, - { .temp_c = 110, .ohm = 779 }, - { .temp_c = 115, .ohm = 694 }, - { .temp_c = 120, .ohm = 620 }, - { .temp_c = 125, .ohm = 556 }, + { .temp_c = 30, .ohm = 8315 }, + { .temp_c = 35, .ohm = 6948 }, + { .temp_c = 40, .ohm = 5834 }, + { .temp_c = 45, .ohm = 4917 }, + { .temp_c = 50, .ohm = 4161 }, + { .temp_c = 55, .ohm = 3535 }, + { .temp_c = 60, .ohm = 3014 }, + { .temp_c = 65, .ohm = 2586 }, + { .temp_c = 70, .ohm = 2228 }, + { .temp_c = 75, .ohm = 1925 }, + { .temp_c = 80, .ohm = 1669 }, + { .temp_c = 85, .ohm = 1452 }, + { .temp_c = 90, .ohm = 1268 }, + { .temp_c = 95, .ohm = 1110 }, + { .temp_c = 100, .ohm = 974 }, + { .temp_c = 105, .ohm = 858 }, + { .temp_c = 110, .ohm = 758 }, + { .temp_c = 115, .ohm = 672 }, + { .temp_c = 120, .ohm = 596 }, + { .temp_c = 125, .ohm = 531 }, }; /* -- 2.51.0 From e278d5e8aef4c0a1d9a9fa8b8910d713a89aa800 Mon Sep 17 00:00:00 2001 From: Erik Schumacher Date: Mon, 24 Feb 2025 09:19:04 +0000 Subject: [PATCH 15/16] hwmon: (ad7314) Validate leading zero bits and return error Leading zero bits are sent on the bus before the temperature value is transmitted. If any of these bits are high, the connection might be unstable or there could be no AD7314 / ADT730x (or compatible) at all. Return -EIO in that case. Signed-off-by: Erik Schumacher Fixes: 4f3a659581cab ("hwmon: AD7314 driver (ported from IIO)") Link: https://lore.kernel.org/r/24a50c2981a318580aca8f50d23be7987b69ea00.camel@iris-sensing.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ad7314.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hwmon/ad7314.c b/drivers/hwmon/ad7314.c index 7802bbf5f958..59424103f634 100644 --- a/drivers/hwmon/ad7314.c +++ b/drivers/hwmon/ad7314.c @@ -22,11 +22,13 @@ */ #define AD7314_TEMP_MASK 0x7FE0 #define AD7314_TEMP_SHIFT 5 +#define AD7314_LEADING_ZEROS_MASK BIT(15) /* * ADT7301 and ADT7302 temperature masks */ #define ADT7301_TEMP_MASK 0x3FFF +#define ADT7301_LEADING_ZEROS_MASK (BIT(15) | BIT(14)) enum ad7314_variant { adt7301, @@ -65,12 +67,20 @@ static ssize_t ad7314_temperature_show(struct device *dev, return ret; switch (spi_get_device_id(chip->spi_dev)->driver_data) { case ad7314: + if (ret & AD7314_LEADING_ZEROS_MASK) { + /* Invalid read-out, leading zero part is missing */ + return -EIO; + } data = (ret & AD7314_TEMP_MASK) >> AD7314_TEMP_SHIFT; data = sign_extend32(data, 9); return sprintf(buf, "%d\n", 250 * data); case adt7301: case adt7302: + if (ret & ADT7301_LEADING_ZEROS_MASK) { + /* Invalid read-out, leading zero part is missing */ + return -EIO; + } /* * Documented as a 13 bit twos complement register * with a sign bit - which is a 14 bit 2's complement -- 2.51.0 From 10fce7ebe888fa8c97eee7e317a47e7603e5e78d Mon Sep 17 00:00:00 2001 From: Xinghuo Chen Date: Mon, 3 Mar 2025 07:57:33 -0500 Subject: [PATCH 16/16] hwmon: fix a NULL vs IS_ERR_OR_NULL() check in xgene_hwmon_probe() The devm_memremap() function returns error pointers on error, it doesn't return NULL. Fixes: c7cefce03e69 ("hwmon: (xgene) access mailbox as RAM") Signed-off-by: Xinghuo Chen Link: https://lore.kernel.org/r/tencent_9AD8E7683EC29CAC97496B44F3F865BA070A@qq.com Signed-off-by: Guenter Roeck --- drivers/hwmon/xgene-hwmon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c index 1e3bd129a922..7087197383c9 100644 --- a/drivers/hwmon/xgene-hwmon.c +++ b/drivers/hwmon/xgene-hwmon.c @@ -706,7 +706,7 @@ static int xgene_hwmon_probe(struct platform_device *pdev) goto out; } - if (!ctx->pcc_comm_addr) { + if (IS_ERR_OR_NULL(ctx->pcc_comm_addr)) { dev_err(&pdev->dev, "Failed to ioremap PCC comm region\n"); rc = -ENOMEM; -- 2.51.0