From a1a7eb89ca0b89dc1c326eeee2596f263291aca3 Mon Sep 17 00:00:00 2001 From: Nikolay Kuratov Date: Thu, 6 Feb 2025 12:01:56 +0300 Subject: [PATCH 01/16] ftrace: Avoid potential division by zero in function_stat_show() Check whether denominator expression x * (x - 1) * 1000 mod {2^32, 2^64} produce zero and skip stddev computation in that case. For now don't care about rec->counter * rec->counter overflow because rec->time * rec->time overflow will likely happen earlier. Cc: stable@vger.kernel.org Cc: Wen Yang Cc: Mark Rutland Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250206090156.1561783-1-kniv@yandex-team.ru Fixes: e31f7939c1c27 ("ftrace: Avoid potential division by zero in function profiler") Signed-off-by: Nikolay Kuratov Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ftrace.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6b0c25761ccb..fc88e0688daf 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -540,6 +540,7 @@ static int function_stat_show(struct seq_file *m, void *v) static struct trace_seq s; unsigned long long avg; unsigned long long stddev; + unsigned long long stddev_denom; #endif guard(mutex)(&ftrace_profile_lock); @@ -559,23 +560,19 @@ static int function_stat_show(struct seq_file *m, void *v) #ifdef CONFIG_FUNCTION_GRAPH_TRACER seq_puts(m, " "); - /* Sample standard deviation (s^2) */ - if (rec->counter <= 1) - stddev = 0; - else { - /* - * Apply Welford's method: - * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) - */ + /* + * Variance formula: + * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2) + * Maybe Welford's method is better here? + * Divide only by 1000 for ns^2 -> us^2 conversion. + * trace_print_graph_duration will divide by 1000 again. + */ + stddev = 0; + stddev_denom = rec->counter * (rec->counter - 1) * 1000; + if (stddev_denom) { stddev = rec->counter * rec->time_squared - rec->time * rec->time; - - /* - * Divide only 1000 for ns^2 -> us^2 conversion. - * trace_print_graph_duration will divide 1000 again. - */ - stddev = div64_ul(stddev, - rec->counter * (rec->counter - 1) * 1000); + stddev = div64_ul(stddev, stddev_denom); } trace_seq_init(&s); -- 2.51.0 From c5b0320bbf79548fbf058a3925a07c8f281beeab Mon Sep 17 00:00:00 2001 From: Alejandro Jimenez Date: Mon, 6 Jan 2025 19:14:13 +0000 Subject: [PATCH 02/16] iommu/amd: Preserve default DTE fields when updating Host Page Table Root When updating the page table root field on the DTE, avoid overwriting any bits that are already set. The earlier call to make_clear_dte() writes default values that all DTEs must have set (currently DTE[V]), and those must be preserved. Currently this doesn't cause problems since the page table root update is the first field that is set after make_clear_dte() is called, and DTE_FLAG_V is set again later along with the permission bits (IR/IW). Remove this redundant assignment too. Fixes: fd5dff9de4be ("iommu/amd: Modify set_dte_entry() to use 256-bit DTE helpers") Signed-off-by: Alejandro Jimenez Reviewed-by: Dheeraj Kumar Srivastava Reviewed-by: Vasant Hegde Link: https://lore.kernel.org/r/20250106191413.3107140-1-alejandro.j.jimenez@oracle.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b48a72bd7b23..cd5116d8c3b2 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2043,12 +2043,12 @@ static void set_dte_entry(struct amd_iommu *iommu, make_clear_dte(dev_data, dte, &new); if (domain->iop.mode != PAGE_MODE_NONE) - new.data[0] = iommu_virt_to_phys(domain->iop.root); + new.data[0] |= iommu_virt_to_phys(domain->iop.root); new.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; - new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V; + new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW; /* * When SNP is enabled, we can only support TV=1 with non-zero domain ID. -- 2.51.0 From 64f792981e35e191eb619f6f2fefab76cc7d6112 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Fri, 28 Feb 2025 18:27:25 +0800 Subject: [PATCH 03/16] iommu/vt-d: Remove device comparison in context_setup_pass_through_cb Remove the device comparison check in context_setup_pass_through_cb. pci_for_each_dma_alias already makes a decision on whether the callback function should be called for a device. With the check in place it will fail to create context entries for aliases as it walks up to the root bus. Fixes: 2031c469f816 ("iommu/vt-d: Add support for static identity domain") Closes: https://lore.kernel.org/linux-iommu/82499eb6-00b7-4f83-879a-e97b4144f576@linux.intel.com/ Cc: stable@vger.kernel.org Signed-off-by: Jerry Snitselaar Link: https://lore.kernel.org/r/20250224180316.140123-1-jsnitsel@redhat.com Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel/iommu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index cc46098f875b..4d8d4593c9c8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4378,9 +4378,6 @@ static int context_setup_pass_through_cb(struct pci_dev *pdev, u16 alias, void * { struct device *dev = data; - if (dev != &pdev->dev) - return 0; - return context_setup_pass_through(dev, PCI_BUS_NUM(alias), alias & 0xff); } -- 2.51.0 From b150654f74bf0df8e6a7936d5ec51400d9ec06d8 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 28 Feb 2025 18:27:26 +0800 Subject: [PATCH 04/16] iommu/vt-d: Fix suspicious RCU usage Commit ("iommu/vt-d: Allocate DMAR fault interrupts locally") moved the call to enable_drhd_fault_handling() to a code path that does not hold any lock while traversing the drhd list. Fix it by ensuring the dmar_global_lock lock is held when traversing the drhd list. Without this fix, the following warning is triggered: ============================= WARNING: suspicious RCU usage 6.14.0-rc3 #55 Not tainted ----------------------------- drivers/iommu/intel/dmar.c:2046 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 2 locks held by cpuhp/1/23: #0: ffffffff84a67c50 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x87/0x2c0 #1: ffffffff84a6a380 (cpuhp_state-up){+.+.}-{0:0}, at: cpuhp_thread_fun+0x87/0x2c0 stack backtrace: CPU: 1 UID: 0 PID: 23 Comm: cpuhp/1 Not tainted 6.14.0-rc3 #55 Call Trace: dump_stack_lvl+0xb7/0xd0 lockdep_rcu_suspicious+0x159/0x1f0 ? __pfx_enable_drhd_fault_handling+0x10/0x10 enable_drhd_fault_handling+0x151/0x180 cpuhp_invoke_callback+0x1df/0x990 cpuhp_thread_fun+0x1ea/0x2c0 smpboot_thread_fn+0x1f5/0x2e0 ? __pfx_smpboot_thread_fn+0x10/0x10 kthread+0x12a/0x2d0 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x4a/0x60 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 Holding the lock in enable_drhd_fault_handling() triggers a lockdep splat about a possible deadlock between dmar_global_lock and cpu_hotplug_lock. This is avoided by not holding dmar_global_lock when calling iommu_device_register(), which initiates the device probe process. Fixes: d74169ceb0d2 ("iommu/vt-d: Allocate DMAR fault interrupts locally") Reported-and-tested-by: Ido Schimmel Closes: https://lore.kernel.org/linux-iommu/Zx9OwdLIc_VoQ0-a@shredder.mtl.com/ Tested-by: Breno Leitao Cc: stable@vger.kernel.org Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20250218022422.2315082-1-baolu.lu@linux.intel.com Tested-by: Ido Schimmel Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 1 + drivers/iommu/intel/iommu.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 9f424acf474e..e540092d664d 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -2043,6 +2043,7 @@ int enable_drhd_fault_handling(unsigned int cpu) /* * Enable fault control interrupt. */ + guard(rwsem_read)(&dmar_global_lock); for_each_iommu(iommu, drhd) { u32 fault_status; int ret; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 4d8d4593c9c8..bf1f0c814348 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3146,7 +3146,14 @@ int __init intel_iommu_init(void) iommu_device_sysfs_add(&iommu->iommu, NULL, intel_iommu_groups, "%s", iommu->name); + /* + * The iommu device probe is protected by the iommu_probe_device_lock. + * Release the dmar_global_lock before entering the device probe path + * to avoid unnecessary lock order splat. + */ + up_read(&dmar_global_lock); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); + down_read(&dmar_global_lock); iommu_pmu_register(iommu); } -- 2.51.0 From b654f7a51ffb386131de42aa98ed831f8c126546 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 28 Feb 2025 21:26:56 +0800 Subject: [PATCH 05/16] block: fix 'kmem_cache of name 'bio-108' already exists' Device mapper bioset often has big bio_slab size, which can be more than 1000, then 8byte can't hold the slab name any more, cause the kmem_cache allocation warning of 'kmem_cache of name 'bio-108' already exists'. Fix the warning by extending bio_slab->name to 12 bytes, but fix output of /proc/slabinfo Reported-by: Guangwu Zhang Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250228132656.2838008-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/bio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/bio.c b/block/bio.c index f0c416e5931d..6ac5983ba51e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -77,7 +77,7 @@ struct bio_slab { struct kmem_cache *slab; unsigned int slab_ref; unsigned int slab_size; - char name[8]; + char name[12]; }; static DEFINE_MUTEX(bio_slab_lock); static DEFINE_XARRAY(bio_slabs); -- 2.51.0 From 64407f4b5807dc9dec8135e1bfd45d2cb11b4ea0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Feb 2025 16:03:47 +0300 Subject: [PATCH 06/16] gpiolib: Fix Oops in gpiod_direction_input_nonotify() The gpiod_direction_input_nonotify() function is supposed to return zero if the direction for the pin is input. But instead it accidentally returns GPIO_LINE_DIRECTION_IN (1) which will be cast into an ERR_PTR() in gpiochip_request_own_desc(). The callers dereference it and it leads to a crash. I changed gpiod_direction_output_raw_commit() just for consistency but returning GPIO_LINE_DIRECTION_OUT (0) is fine. Cc: stable@vger.kernel.org Fixes: 9d846b1aebbe ("gpiolib: check the return value of gpio_chip::get_direction()") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/254f3925-3015-4c9d-aac5-bb9b4b2cd2c5@stanley.mountain [Bartosz: moved the variable declarations to the top of the functions] Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index fc19df5a64c2..8741600af7ef 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2712,7 +2712,7 @@ EXPORT_SYMBOL_GPL(gpiod_direction_input); int gpiod_direction_input_nonotify(struct gpio_desc *desc) { - int ret = 0; + int ret = 0, dir; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) @@ -2740,12 +2740,12 @@ int gpiod_direction_input_nonotify(struct gpio_desc *desc) ret = guard.gc->direction_input(guard.gc, gpio_chip_hwgpio(desc)); } else if (guard.gc->get_direction) { - ret = guard.gc->get_direction(guard.gc, + dir = guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc)); - if (ret < 0) - return ret; + if (dir < 0) + return dir; - if (ret != GPIO_LINE_DIRECTION_IN) { + if (dir != GPIO_LINE_DIRECTION_IN) { gpiod_warn(desc, "%s: missing direction_input() operation and line is output\n", __func__); @@ -2764,7 +2764,7 @@ int gpiod_direction_input_nonotify(struct gpio_desc *desc) static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) { - int val = !!value, ret = 0; + int val = !!value, ret = 0, dir; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) @@ -2788,12 +2788,12 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) } else { /* Check that we are in output mode if we can */ if (guard.gc->get_direction) { - ret = guard.gc->get_direction(guard.gc, + dir = guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc)); - if (ret < 0) - return ret; + if (dir < 0) + return dir; - if (ret != GPIO_LINE_DIRECTION_OUT) { + if (dir != GPIO_LINE_DIRECTION_OUT) { gpiod_warn(desc, "%s: missing direction_output() operation\n", __func__); -- 2.51.0 From c157d351460bcf202970e97e611cb6b54a3dd4a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 25 Feb 2025 23:37:08 +0100 Subject: [PATCH 07/16] intel_idle: Handle older CPUs, which stop the TSC in deeper C states, correctly The Intel idle driver is preferred over the ACPI processor idle driver, but fails to implement the work around for Core2 generation CPUs, where the TSC stops in C2 and deeper C-states. This causes stalls and boot delays, when the clocksource watchdog does not catch the unstable TSC before the CPU goes deep idle for the first time. The ACPI driver marks the TSC unstable when it detects that the CPU supports C2 or deeper and the CPU does not have a non-stop TSC. Add the equivivalent work around to the Intel idle driver to cure that. Fixes: 18734958e9bf ("intel_idle: Use ACPI _CST for processor models without C-state tables") Reported-by: Fab Stz Signed-off-by: Thomas Gleixner Tested-by: Fab Stz Cc: All applicable Closes: https://lore.kernel.org/all/10cf96aa-1276-4bd4-8966-c890377030c3@yahoo.fr Link: https://patch.msgid.link/87bjupfy7f.ffs@tglx Signed-off-by: Rafael J. Wysocki --- drivers/idle/intel_idle.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 118fe1d37c22..0fdb1d1316c4 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #define INTEL_IDLE_VERSION "0.5.1" @@ -1799,6 +1800,9 @@ static void __init intel_idle_init_cstates_acpi(struct cpuidle_driver *drv) if (intel_idle_state_needs_timer_stop(state)) state->flags |= CPUIDLE_FLAG_TIMER_STOP; + if (cx->type > ACPI_STATE_C1 && !boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) + mark_tsc_unstable("TSC halts in idle"); + state->enter = intel_idle; state->enter_s2idle = intel_idle_s2idle; } -- 2.51.0 From cb380909ae3b1ebf14d6a455a4f92d7916d790cb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 27 Feb 2025 15:06:30 -0800 Subject: [PATCH 08/16] vhost: return task creation error instead of NULL Lets callers distinguish why the vhost task creation failed. No one currently cares why it failed, so no real runtime change from this patch, but that will not be the case for long. Signed-off-by: Keith Busch Message-ID: <20250227230631.303431-2-kbusch@meta.com> Reviewed-by: Mike Christie Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 2 +- drivers/vhost/vhost.c | 2 +- kernel/vhost_task.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index d4ac4a1f8b81..18ca1ea6dc24 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7471,7 +7471,7 @@ static void kvm_mmu_start_lpage_recovery(struct once *once) kvm_nx_huge_page_recovery_worker_kill, kvm, "kvm-nx-lpage-recovery"); - if (!nx_thread) + if (IS_ERR(nx_thread)) return; vhost_task_start(nx_thread); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9ac25d08f473..63612faeab72 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -666,7 +666,7 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) vtsk = vhost_task_create(vhost_run_work_list, vhost_worker_killed, worker, name); - if (!vtsk) + if (IS_ERR(vtsk)) goto free_worker; mutex_init(&worker->mutex); diff --git a/kernel/vhost_task.c b/kernel/vhost_task.c index 8800f5acc007..2ef2e1b80091 100644 --- a/kernel/vhost_task.c +++ b/kernel/vhost_task.c @@ -133,7 +133,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), vtsk = kzalloc(sizeof(*vtsk), GFP_KERNEL); if (!vtsk) - return NULL; + return ERR_PTR(-ENOMEM); init_completion(&vtsk->exited); mutex_init(&vtsk->exit_mutex); vtsk->data = arg; @@ -145,7 +145,7 @@ struct vhost_task *vhost_task_create(bool (*fn)(void *), tsk = copy_process(NULL, 0, NUMA_NO_NODE, &args); if (IS_ERR(tsk)) { kfree(vtsk); - return NULL; + return ERR_PTR(PTR_ERR(tsk)); } vtsk->task = tsk; -- 2.51.0 From 916b7f42b3b3b539a71c204a9b49fdc4ca92cd82 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 27 Feb 2025 15:06:31 -0800 Subject: [PATCH 09/16] kvm: retry nx_huge_page_recovery_thread creation A VMM may send a non-fatal signal to its threads, including vCPU tasks, at any time, and thus may signal vCPU tasks during KVM_RUN. If a vCPU task receives the signal while its trying to spawn the huge page recovery vhost task, then KVM_RUN will fail due to copy_process() returning -ERESTARTNOINTR. Rework call_once() to mark the call complete if and only if the called function succeeds, and plumb the function's true error code back to the call_once() invoker. This provides userspace with the correct, non-fatal error code so that the VMM doesn't terminate the VM on -ENOMEM, and allows subsequent KVM_RUN a succeed by virtue of retrying creation of the NX huge page task. Co-developed-by: Sean Christopherson Signed-off-by: Sean Christopherson [implemented the kvm user side] Signed-off-by: Keith Busch Message-ID: <20250227230631.303431-3-kbusch@meta.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 10 ++++----- include/linux/call_once.h | 47 ++++++++++++++++++++++++++++----------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 18ca1ea6dc24..8160870398b9 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -7460,7 +7460,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data) return true; } -static void kvm_mmu_start_lpage_recovery(struct once *once) +static int kvm_mmu_start_lpage_recovery(struct once *once) { struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once); struct kvm *kvm = container_of(ka, struct kvm, arch); @@ -7472,12 +7472,13 @@ static void kvm_mmu_start_lpage_recovery(struct once *once) kvm, "kvm-nx-lpage-recovery"); if (IS_ERR(nx_thread)) - return; + return PTR_ERR(nx_thread); vhost_task_start(nx_thread); /* Make the task visible only once it is fully started. */ WRITE_ONCE(kvm->arch.nx_huge_page_recovery_thread, nx_thread); + return 0; } int kvm_mmu_post_init_vm(struct kvm *kvm) @@ -7485,10 +7486,7 @@ int kvm_mmu_post_init_vm(struct kvm *kvm) if (nx_hugepage_mitigation_hard_disabled) return 0; - call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); - if (!kvm->arch.nx_huge_page_recovery_thread) - return -ENOMEM; - return 0; + return call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery); } void kvm_mmu_pre_destroy_vm(struct kvm *kvm) diff --git a/include/linux/call_once.h b/include/linux/call_once.h index 6261aa0b3fb0..13cd6469e7e5 100644 --- a/include/linux/call_once.h +++ b/include/linux/call_once.h @@ -26,20 +26,41 @@ do { \ __once_init((once), #once, &__key); \ } while (0) -static inline void call_once(struct once *once, void (*cb)(struct once *)) +/* + * call_once - Ensure a function has been called exactly once + * + * @once: Tracking struct + * @cb: Function to be called + * + * If @once has never completed successfully before, call @cb and, if + * it returns a zero or positive value, mark @once as completed. Return + * the value returned by @cb + * + * If @once has completed succesfully before, return 0. + * + * The call to @cb is implicitly surrounded by a mutex, though for + * efficiency the * function avoids taking it after the first call. + */ +static inline int call_once(struct once *once, int (*cb)(struct once *)) { - /* Pairs with atomic_set_release() below. */ - if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) - return; - - guard(mutex)(&once->lock); - WARN_ON(atomic_read(&once->state) == ONCE_RUNNING); - if (atomic_read(&once->state) != ONCE_NOT_STARTED) - return; - - atomic_set(&once->state, ONCE_RUNNING); - cb(once); - atomic_set_release(&once->state, ONCE_COMPLETED); + int r, state; + + /* Pairs with atomic_set_release() below. */ + if (atomic_read_acquire(&once->state) == ONCE_COMPLETED) + return 0; + + guard(mutex)(&once->lock); + state = atomic_read(&once->state); + if (unlikely(state != ONCE_NOT_STARTED)) + return WARN_ON_ONCE(state != ONCE_COMPLETED) ? -EINVAL : 0; + + atomic_set(&once->state, ONCE_RUNNING); + r = cb(once); + if (r < 0) + atomic_set(&once->state, ONCE_NOT_STARTED); + else + atomic_set_release(&once->state, ONCE_COMPLETED); + return r; } #endif /* _LINUX_CALL_ONCE_H */ -- 2.51.0 From a2f925a2f62254119cdaa360cfc9c0424bccd531 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 28 Feb 2025 13:26:04 +0100 Subject: [PATCH 10/16] Revert "ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives" This reverts commit cc77e2ce187d26cc66af3577bf896d7410eb25ab. It was reported that adding ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives breaks entering lower package states for certain systems. It turns out that Samsung SSD 870 QVO actually has working LPM when using a recent SSD firmware version. The author of commit cc77e2ce187d ("ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives") reported himself that only older SSD firmware versions have broken LPM: https://lore.kernel.org/stable/93c10d38-718c-459d-84a5-4d87680b4da7@debian.org/ Unfortunately, he did not specify which older firmware version he was using which had broken LPM. Let's revert this quirk, which has FW version field specified as NULL (which means that it applies for all Samsung SSD 870 QVO firmware versions) for now. Once the author reports which older firmware version(s) that are broken, we can create a more fine grained quirk, which populates the FW version field accordingly. Fixes: cc77e2ce187d ("ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives") Reported-by: Dieter Mummenschanz Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219747 Link: https://lore.kernel.org/r/20250228122603.91814-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 63ec2f218431..c085dd81ebe7 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4143,10 +4143,6 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "Samsung SSD 860*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, - { "Samsung SSD 870 QVO*", NULL, ATA_QUIRK_NO_NCQ_TRIM | - ATA_QUIRK_ZERO_AFTER_TRIM | - ATA_QUIRK_NO_NCQ_ON_ATI | - ATA_QUIRK_NOLPM }, { "Samsung SSD 870*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, -- 2.51.0 From 7eb172143d5508b4da468ed59ee857c6e5e01da6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Mar 2025 11:48:20 -0800 Subject: [PATCH 11/16] Linux 6.14-rc5 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 30dab4c8b012..70bdbf2218fc 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 8014097f1466f7e034844770c537b8dc7d98811f Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 27 Feb 2025 16:28:31 +0100 Subject: [PATCH 12/16] gpiolib: remove unneeded WARN_ON() from gpiochip_set_multiple() GPIO drivers are not required to support set_multiple() - the core will fallback to calling set() for each line if it's missing. Remove the offending check from gpiochip_set_multiple(). Fixes: 98ce1eb1fd87 ("gpiolib: introduce gpio_chip setters that return values") Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/all/ab3e42c0-70fa-48e0-ac93-ecbffef63507@samsung.com/ Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20250227152831.59784-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 212269f2d9a2..ec2217a30db3 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3604,9 +3604,6 @@ static int gpiochip_set_multiple(struct gpio_chip *gc, lockdep_assert_held(&gc->gpiodev->srcu); - if (WARN_ON(unlikely(!gc->set_multiple && !gc->set_multiple_rv))) - return -EOPNOTSUPP; - if (gc->set_multiple_rv) { ret = gc->set_multiple_rv(gc, mask, bits); if (ret > 0) -- 2.51.0 From 6224e7fc1ce75edcd03b56a2e0fd4c1765d5888e Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 27 Feb 2025 09:37:47 +0100 Subject: [PATCH 13/16] gpiolib: deprecate gpio_chip::set and gpio_chip::set_multiple We now have setter callbacks that allow us to indicate success or failure using the integer return value. Deprecate the older callbacks so that no new code is tempted to use them. Link: https://lore.kernel.org/r/20250227083748.22400-1-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- include/linux/gpio/driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index a2a1b6434321..783897d94be8 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -346,8 +346,8 @@ struct gpio_irq_chip { * @get: returns value for signal "offset", 0=low, 1=high, or negative error * @get_multiple: reads values for multiple signals defined by "mask" and * stores them in "bits", returns 0 on success or negative error - * @set: assigns output value for signal "offset" - * @set_multiple: assigns output values for multiple signals defined by "mask" + * @set: **DEPRECATED** - please use set_rv() instead + * @set_multiple: **DEPRECATED** - please use set_multiple_rv() instead * @set_rv: assigns output value for signal "offset", returns 0 on success or * negative error value * @set_multiple_rv: assigns output values for multiple signals defined by -- 2.51.0 From 9778568dede2166c7bd124d473f9ec365f782935 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 27 Feb 2025 09:37:48 +0100 Subject: [PATCH 14/16] gpiolib: update kerneldocs for value setters Value setters now return int and can indicate failure. Update the kerneldocs. Link: https://lore.kernel.org/r/20250227083748.22400-2-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index ec2217a30db3..8724c7d8459e 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3761,6 +3761,9 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep, * * This function can be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. + * + * Returns: + * 0 on success, negative error number on failure. */ int gpiod_set_raw_value(struct gpio_desc *desc, int value) { @@ -3779,6 +3782,9 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_value); * This sets the value of a GPIO line backing a descriptor, applying * different semantic quirks like active low and open drain/source * handling. + * + * Returns: + * 0 on success, negative error number on failure. */ static int gpiod_set_value_nocheck(struct gpio_desc *desc, int value) { @@ -3803,6 +3809,9 @@ static int gpiod_set_value_nocheck(struct gpio_desc *desc, int value) * * This function can be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. + * + * Returns: + * 0 on success, negative error number on failure. */ int gpiod_set_value(struct gpio_desc *desc, int value) { @@ -4227,6 +4236,9 @@ EXPORT_SYMBOL_GPL(gpiod_get_array_value_cansleep); * regard for its ACTIVE_LOW status. * * This function is to be called from contexts that can sleep. + * + * Returns: + * 0 on success, negative error number on failure. */ int gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value) { @@ -4245,6 +4257,9 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_value_cansleep); * account * * This function is to be called from contexts that can sleep. + * + * Returns: + * 0 on success, negative error number on failure. */ int gpiod_set_value_cansleep(struct gpio_desc *desc, int value) { -- 2.51.0 From 732457dc46d62f1df4b2b48c6dbf22b4332da14b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 Feb 2025 20:40:33 +0100 Subject: [PATCH 15/16] gpiolib: of: Use local variables Instead of modifying the contents of the array of values read in from a phandle, use local variables to store the values. This makes the code easier to read and the array immutable. Reviewed-by: Alex Elder Tested-by: Yixun Lan Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20250225-gpio-ranges-fourcell-v3-1-860382ba4713@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 2e537ee979f3..86405218f4e2 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -1057,6 +1057,9 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip) const char *name; static const char group_names_propname[] = "gpio-ranges-group-names"; bool has_group_names; + int offset; /* Offset of the first GPIO line on the chip */ + int pin; /* Pin base number in the range */ + int count; /* Number of pins/GPIO lines to map */ np = dev_of_node(&chip->gpiodev->dev); if (!np) @@ -1075,13 +1078,17 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip) if (!pctldev) return -EPROBE_DEFER; + offset = pinspec.args[0]; + pin = pinspec.args[1]; + count = pinspec.args[2]; + /* Ignore ranges outside of this GPIO chip */ - if (pinspec.args[0] >= (chip->offset + chip->ngpio)) + if (offset >= (chip->offset + chip->ngpio)) continue; - if (pinspec.args[0] + pinspec.args[2] <= chip->offset) + if (offset + count <= chip->offset) continue; - if (pinspec.args[2]) { + if (count) { /* npins != 0: linear range */ if (has_group_names) { of_property_read_string_index(np, @@ -1095,27 +1102,27 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip) } /* Trim the range to fit this GPIO chip */ - if (chip->offset > pinspec.args[0]) { - trim = chip->offset - pinspec.args[0]; - pinspec.args[2] -= trim; - pinspec.args[1] += trim; - pinspec.args[0] = 0; + if (chip->offset > offset) { + trim = chip->offset - offset; + count -= trim; + pin += trim; + offset = 0; } else { - pinspec.args[0] -= chip->offset; + offset -= chip->offset; } - if ((pinspec.args[0] + pinspec.args[2]) > chip->ngpio) - pinspec.args[2] = chip->ngpio - pinspec.args[0]; + if ((offset + count) > chip->ngpio) + count = chip->ngpio - offset; ret = gpiochip_add_pin_range(chip, pinctrl_dev_get_devname(pctldev), - pinspec.args[0], - pinspec.args[1], - pinspec.args[2]); + offset, + pin, + count); if (ret) return ret; } else { /* npins == 0: special range */ - if (pinspec.args[1]) { + if (pin) { pr_err("%pOF: Illegal gpio-range format.\n", np); break; @@ -1140,7 +1147,7 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip) } ret = gpiochip_add_pingroup_range(chip, pctldev, - pinspec.args[0], name); + offset, name); if (ret) return ret; } -- 2.51.0 From bd3ce71078bde4ecbfc60d49c96d1c55de0635cc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 25 Feb 2025 20:40:34 +0100 Subject: [PATCH 16/16] gpiolib: of: Handle threecell GPIO chips When describing GPIO controllers in the device tree, the ambition of device tree to describe the hardware may require a three-cell scheme: gpios = <&gpio instance offset flags>; This implements support for this scheme in the gpiolib OF core. Drivers that want to handle multiple gpiochip instances from one OF node need to implement a callback similar to this to determine if a certain gpio chip is a pointer to the right instance (pseudo-code): struct my_gpio { struct gpio_chip gcs[MAX_CHIPS]; }; static bool my_of_node_instance_match(struct gpio_chip *gc unsigned int instance) { struct my_gpio *mg = gpiochip_get_data(gc); if (instance >= MAX_CHIPS) return false; return (gc == &mg->gcs[instance]); } probe() { struct my_gpio *mg; struct gpio_chip *gc; int i, ret; for (i = 0; i++; i < MAX_CHIPS) { gc = &mg->gcs[i]; /* This tells gpiolib we have several instances per node */ gc->of_gpio_n_cells = 3; gc->of_node_instance_match = my_of_node_instance_match; gc->base = -1; ... ret = devm_gpiochip_add_data(dev, gc, mg); if (ret) return ret; } } Rename the "simple" of_xlate function to "twocell" which is closer to what it actually does. In the device tree bindings, the provide node needs to specify #gpio-cells = <3>; where the first cell is the instance number: gpios = <&gpio instance offset flags>; Conversely ranges need to have four cells: gpio-ranges = <&pinctrl instance gpio_offset pin_offset count>; Reviewed-by: Alex Elder Tested-by: Yixun Lan Signed-off-by: Linus Walleij Reviewed-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20250225-gpio-ranges-fourcell-v3-2-860382ba4713@linaro.org Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib-of.c | 95 ++++++++++++++++++++++++++++++++----- include/linux/gpio/driver.h | 24 +++++++++- 2 files changed, 107 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 86405218f4e2..6e0eb67dcbf0 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -929,7 +929,7 @@ struct notifier_block gpio_of_notifier = { #endif /* CONFIG_OF_DYNAMIC */ /** - * of_gpio_simple_xlate - translate gpiospec to the GPIO number and flags + * of_gpio_twocell_xlate - translate twocell gpiospec to the GPIO number and flags * @gc: pointer to the gpio_chip structure * @gpiospec: GPIO specifier as found in the device tree * @flags: a flags pointer to fill in @@ -941,9 +941,9 @@ struct notifier_block gpio_of_notifier = { * Returns: * GPIO number (>= 0) on success, negative errno on failure. */ -static int of_gpio_simple_xlate(struct gpio_chip *gc, - const struct of_phandle_args *gpiospec, - u32 *flags) +static int of_gpio_twocell_xlate(struct gpio_chip *gc, + const struct of_phandle_args *gpiospec, + u32 *flags) { /* * We're discouraging gpio_cells < 2, since that way you'll have to @@ -951,7 +951,7 @@ static int of_gpio_simple_xlate(struct gpio_chip *gc, * number and the flags from a single gpio cell -- this is possible, * but not recommended). */ - if (gc->of_gpio_n_cells < 2) { + if (gc->of_gpio_n_cells != 2) { WARN_ON(1); return -EINVAL; } @@ -968,6 +968,49 @@ static int of_gpio_simple_xlate(struct gpio_chip *gc, return gpiospec->args[0]; } +/** + * of_gpio_threecell_xlate - translate threecell gpiospec to the GPIO number and flags + * @gc: pointer to the gpio_chip structure + * @gpiospec: GPIO specifier as found in the device tree + * @flags: a flags pointer to fill in + * + * This is simple translation function, suitable for the most 1:n mapped + * GPIO chips, i.e. several GPIO chip instances from one device tree node. + * In this case the following binding is implied: + * + * foo-gpios = <&gpio instance offset flags>; + * + * Returns: + * GPIO number (>= 0) on success, negative errno on failure. + */ +static int of_gpio_threecell_xlate(struct gpio_chip *gc, + const struct of_phandle_args *gpiospec, + u32 *flags) +{ + if (gc->of_gpio_n_cells != 3) { + WARN_ON(1); + return -EINVAL; + } + + if (WARN_ON(gpiospec->args_count != 3)) + return -EINVAL; + + /* + * Check chip instance number, the driver responds with true if + * this is the chip we are looking for. + */ + if (!gc->of_node_instance_match(gc, gpiospec->args[0])) + return -EINVAL; + + if (gpiospec->args[1] >= gc->ngpio) + return -EINVAL; + + if (flags) + *flags = gpiospec->args[2]; + + return gpiospec->args[1]; +} + #if IS_ENABLED(CONFIG_OF_GPIO_MM_GPIOCHIP) #include /** @@ -1068,7 +1111,15 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip) has_group_names = of_property_present(np, group_names_propname); for (;; index++) { - ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, + /* + * Ordinary phandles contain 2-3 cells: + * gpios = <&gpio [instance] offset flags>; + * Ranges always contain one more cell: + * gpio-ranges <&pinctrl [gpio_instance] gpio_offet pin_offet count>; + * This is why we parse chip->of_gpio_n_cells + 1 cells + */ + ret = of_parse_phandle_with_fixed_args(np, "gpio-ranges", + chip->of_gpio_n_cells + 1, index, &pinspec); if (ret) break; @@ -1078,9 +1129,25 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip) if (!pctldev) return -EPROBE_DEFER; - offset = pinspec.args[0]; - pin = pinspec.args[1]; - count = pinspec.args[2]; + if (chip->of_gpio_n_cells == 3) { + /* First cell is the gpiochip instance number */ + offset = pinspec.args[1]; + pin = pinspec.args[2]; + count = pinspec.args[3]; + } else { + offset = pinspec.args[0]; + pin = pinspec.args[1]; + count = pinspec.args[2]; + } + + /* + * With multiple GPIO chips per node, check that this chip is the + * right instance. + */ + if (chip->of_node_instance_match && + (chip->of_gpio_n_cells == 3) && + !chip->of_node_instance_match(chip, pinspec.args[0])) + continue; /* Ignore ranges outside of this GPIO chip */ if (offset >= (chip->offset + chip->ngpio)) @@ -1170,8 +1237,14 @@ int of_gpiochip_add(struct gpio_chip *chip) return 0; if (!chip->of_xlate) { - chip->of_gpio_n_cells = 2; - chip->of_xlate = of_gpio_simple_xlate; + if (chip->of_gpio_n_cells == 3) { + if (!chip->of_node_instance_match) + return -EINVAL; + chip->of_xlate = of_gpio_threecell_xlate; + } else { + chip->of_gpio_n_cells = 2; + chip->of_xlate = of_gpio_twocell_xlate; + } } if (chip->of_gpio_n_cells > MAX_PHANDLE_ARGS) diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 783897d94be8..83e0a7e86962 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -531,10 +531,32 @@ struct gpio_chip { /** * @of_gpio_n_cells: * - * Number of cells used to form the GPIO specifier. + * Number of cells used to form the GPIO specifier. The standard is 2 + * cells: + * + * gpios = <&gpio offset flags>; + * + * some complex GPIO controllers instantiate more than one chip per + * device tree node and have 3 cells: + * + * gpios = <&gpio instance offset flags>; + * + * Legacy GPIO controllers may even have 1 cell: + * + * gpios = <&gpio offset>; */ unsigned int of_gpio_n_cells; + /** + * of_node_instance_match: + * + * Determine if a chip is the right instance. Must be implemented by + * any driver using more than one gpio_chip per device tree node. + * Returns true if gc is the instance indicated by i (which is the + * first cell in the phandles for GPIO lines and gpio-ranges). + */ + bool (*of_node_instance_match)(struct gpio_chip *gc, unsigned int i); + /** * @of_xlate: * -- 2.51.0