From e16f7eee7c80f9ec7084e06efbe2398586dbf38e Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 3 Oct 2024 14:55:54 -0700 Subject: [PATCH 01/16] platform/x86/intel: power-domains: Add Diamond Rapids support Add Diamond Rapids (INTEL_PANTHERCOVE_X) to tpmi_cpu_ids to support domaid id mappings. Signed-off-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20241003215554.3013807-3-srinivas.pandruvada@linux.intel.com Signed-off-by: Hans de Goede --- drivers/platform/x86/intel/tpmi_power_domains.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/intel/tpmi_power_domains.c b/drivers/platform/x86/intel/tpmi_power_domains.c index 4eb02553957c..0609a8320f7e 100644 --- a/drivers/platform/x86/intel/tpmi_power_domains.c +++ b/drivers/platform/x86/intel/tpmi_power_domains.c @@ -82,6 +82,7 @@ static const struct x86_cpu_id tpmi_cpu_ids[] = { X86_MATCH_VFM(INTEL_ATOM_CRESTMONT_X, NULL), X86_MATCH_VFM(INTEL_ATOM_CRESTMONT, NULL), X86_MATCH_VFM(INTEL_GRANITERAPIDS_D, NULL), + X86_MATCH_VFM(INTEL_PANTHERCOVE_X, NULL), {} }; MODULE_DEVICE_TABLE(x86cpu, tpmi_cpu_ids); -- 2.51.0 From a561509b4187a8908eb7fbb2d1bf35bbc20ec74b Mon Sep 17 00:00:00 2001 From: Crag Wang Date: Fri, 4 Oct 2024 23:27:58 +0800 Subject: [PATCH 02/16] platform/x86: dell-sysman: add support for alienware products Alienware supports firmware-attributes and has its own OEM string. Signed-off-by: Crag Wang Link: https://lore.kernel.org/r/20241004152826.93992-1-crag_wang@dell.com Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede --- drivers/platform/x86/dell/dell-wmi-sysman/sysman.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c index 9def7983d7d6..40ddc6eb7562 100644 --- a/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c +++ b/drivers/platform/x86/dell/dell-wmi-sysman/sysman.c @@ -521,6 +521,7 @@ static int __init sysman_init(void) int ret = 0; if (!dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Dell System", NULL) && + !dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "Alienware", NULL) && !dmi_find_device(DMI_DEV_TYPE_OEM_STRING, "www.dell.com", NULL)) { pr_err("Unable to run on non-Dell system\n"); return -ENODEV; -- 2.51.0 From 7b954b9ba007d03ba26135ac49b2c93208cf090e Mon Sep 17 00:00:00 2001 From: Anaswara T Rajan Date: Sat, 5 Oct 2024 12:30:56 +0530 Subject: [PATCH 03/16] platform/x86: dell-ddv: Fix typo in documentation Fix typo in word 'diagnostics' in documentation. Signed-off-by: Anaswara T Rajan Reviewed-by: Armin Wolf Link: https://lore.kernel.org/r/20241005070056.16326-1-anaswaratrajan@gmail.com Signed-off-by: Hans de Goede --- Documentation/wmi/devices/dell-wmi-ddv.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/wmi/devices/dell-wmi-ddv.rst b/Documentation/wmi/devices/dell-wmi-ddv.rst index 2fcdfcf03327..e0c20af30948 100644 --- a/Documentation/wmi/devices/dell-wmi-ddv.rst +++ b/Documentation/wmi/devices/dell-wmi-ddv.rst @@ -8,7 +8,7 @@ Introduction ============ Many Dell notebooks made after ~2020 support a WMI-based interface for -retrieving various system data like battery temperature, ePPID, diagostic data +retrieving various system data like battery temperature, ePPID, diagnostic data and fan/thermal sensor data. This interface is likely used by the `Dell Data Vault` software on Windows, @@ -277,7 +277,7 @@ Reverse-Engineering the DDV WMI interface 4. Try to deduce the meaning of a certain WMI method by comparing the control flow with other ACPI methods (_BIX or _BIF for battery related methods for example). -5. Use the built-in UEFI diagostics to view sensor types/values for fan/thermal +5. Use the built-in UEFI diagnostics to view sensor types/values for fan/thermal related methods (sometimes overwriting static ACPI data fields can be used to test different sensor type values, since on some machines this data is not reinitialized upon a warm reset). -- 2.51.0 From 5984b40f5bcd41bfd08359cdb9c8cb7ca9d3cc60 Mon Sep 17 00:00:00 2001 From: Armin Wolf Date: Sat, 5 Oct 2024 23:38:24 +0200 Subject: [PATCH 04/16] platform/x86: wmi: Update WMI driver API documentation The WMI driver core now passes the WMI event data to legacy notify handlers, so WMI devices sharing notification IDs are now being handled properly. Fixes: e04e2b760ddb ("platform/x86: wmi: Pass event data directly to legacy notify handlers") Signed-off-by: Armin Wolf Link: https://lore.kernel.org/r/20241005213825.701887-1-W_Armin@gmx.de Signed-off-by: Hans de Goede --- Documentation/driver-api/wmi.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Documentation/driver-api/wmi.rst b/Documentation/driver-api/wmi.rst index 6ca58c8249e5..4e8dbdb1fc67 100644 --- a/Documentation/driver-api/wmi.rst +++ b/Documentation/driver-api/wmi.rst @@ -7,12 +7,11 @@ WMI Driver API The WMI driver core supports a more modern bus-based interface for interacting with WMI devices, and an older GUID-based interface. The latter interface is considered to be deprecated, so new WMI drivers should generally avoid it since -it has some issues with multiple WMI devices and events sharing the same GUIDs -and/or notification IDs. The modern bus-based interface instead maps each -WMI device to a :c:type:`struct wmi_device `, so it supports -WMI devices sharing GUIDs and/or notification IDs. Drivers can then register -a :c:type:`struct wmi_driver `, which will be bound to compatible -WMI devices by the driver core. +it has some issues with multiple WMI devices sharing the same GUID. +The modern bus-based interface instead maps each WMI device to a +:c:type:`struct wmi_device `, so it supports WMI devices sharing the +same GUID. Drivers can then register a :c:type:`struct wmi_driver ` +which will be bound to compatible WMI devices by the driver core. .. kernel-doc:: include/linux/wmi.h :internal: -- 2.51.0 From 2fae3129c0c08e72b1fe93e61fd8fd203252094a Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 5 Oct 2024 15:05:45 +0200 Subject: [PATCH 05/16] platform/x86: x86-android-tablets: Fix use after free on platform_device_register() errors x86_android_tablet_remove() frees the pdevs[] array, so it should not be used after calling x86_android_tablet_remove(). When platform_device_register() fails, store the pdevs[x] PTR_ERR() value into the local ret variable before calling x86_android_tablet_remove() to avoid using pdevs[] after it has been freed. Fixes: 5eba0141206e ("platform/x86: x86-android-tablets: Add support for instantiating platform-devs") Fixes: e2200d3f26da ("platform/x86: x86-android-tablets: Add gpio_keys support to x86_android_tablet_init()") Cc: stable@vger.kernel.org Reported-by: Aleksandr Burakov Closes: https://lore.kernel.org/platform-driver-x86/20240917120458.7300-1-a.burakov@rosalinux.ru/ Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20241005130545.64136-1-hdegoede@redhat.com --- drivers/platform/x86/x86-android-tablets/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/x86-android-tablets/core.c b/drivers/platform/x86/x86-android-tablets/core.c index 1427a9a39008..ef572b90e06b 100644 --- a/drivers/platform/x86/x86-android-tablets/core.c +++ b/drivers/platform/x86/x86-android-tablets/core.c @@ -390,8 +390,9 @@ static __init int x86_android_tablet_probe(struct platform_device *pdev) for (i = 0; i < pdev_count; i++) { pdevs[i] = platform_device_register_full(&dev_info->pdev_info[i]); if (IS_ERR(pdevs[i])) { + ret = PTR_ERR(pdevs[i]); x86_android_tablet_remove(pdev); - return PTR_ERR(pdevs[i]); + return ret; } } @@ -443,8 +444,9 @@ static __init int x86_android_tablet_probe(struct platform_device *pdev) PLATFORM_DEVID_AUTO, &pdata, sizeof(pdata)); if (IS_ERR(pdevs[pdev_count])) { + ret = PTR_ERR(pdevs[pdev_count]); x86_android_tablet_remove(pdev); - return PTR_ERR(pdevs[pdev_count]); + return ret; } pdev_count++; } -- 2.51.0 From c14a30468230c608731f36569bfd9785bb486131 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Mon, 23 Sep 2024 18:18:47 +0000 Subject: [PATCH 06/16] scripts: import more list macros Import list_is_first, list_is_last, list_replace, and list_replace_init. Signed-off-by: Sami Tolvanen Signed-off-by: Masahiro Yamada --- scripts/include/list.h | 50 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/scripts/include/list.h b/scripts/include/list.h index fea1e2b79063..8bdcaadca709 100644 --- a/scripts/include/list.h +++ b/scripts/include/list.h @@ -127,6 +127,36 @@ static inline void list_del(struct list_head *entry) entry->prev = LIST_POISON2; } +/** + * list_replace - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * If @old was empty, it will be overwritten. + */ +static inline void list_replace(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->next->prev = new; + new->prev = old->prev; + new->prev->next = new; +} + +/** + * list_replace_init - replace old entry by new one and initialize the old one + * @old : the element to be replaced + * @new : the new element to insert + * + * If @old was empty, it will be overwritten. + */ +static inline void list_replace_init(struct list_head *old, + struct list_head *new) +{ + list_replace(old, new); + INIT_LIST_HEAD(old); +} + /** * list_move - delete from one list and add as another's head * @list: the entry to move @@ -150,6 +180,26 @@ static inline void list_move_tail(struct list_head *list, list_add_tail(list, head); } +/** + * list_is_first -- tests whether @list is the first entry in list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_first(const struct list_head *list, const struct list_head *head) +{ + return list->prev == head; +} + +/** + * list_is_last - tests whether @list is the last entry in list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_last(const struct list_head *list, const struct list_head *head) +{ + return list->next == head; +} + /** * list_is_head - tests whether @list is the list @head * @list: the entry to test -- 2.51.0 From d939881a15b13c028257471d8853d12d83686bcc Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Wed, 25 Sep 2024 13:32:30 +0800 Subject: [PATCH 07/16] kbuild: fix a typo dt_binding_schema -> dt_binding_schemas If we follow "make help" to "make dt_binding_schema", we will see below error: $ make dt_binding_schema make[1]: *** No rule to make target 'dt_binding_schema'. Stop. make: *** [Makefile:224: __sub-make] Error 2 It should be a typo. So this will fix it. Fixes: 604a57ba9781 ("dt-bindings: kbuild: Add separate target/dependency for processed-schema.json") Signed-off-by: Xu Yang Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 187a4ce2728e..7bb521fae64a 100644 --- a/Makefile +++ b/Makefile @@ -1645,7 +1645,7 @@ help: echo '* dtbs - Build device tree blobs for enabled boards'; \ echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)'; \ echo ' dt_binding_check - Validate device tree binding documents and examples'; \ - echo ' dt_binding_schema - Build processed device tree binding schemas'; \ + echo ' dt_binding_schemas - Build processed device tree binding schemas'; \ echo ' dtbs_check - Validate device tree source files';\ echo '') -- 2.51.0 From 82cb44308951ad4ce7a8500b9e025d27d7fb3526 Mon Sep 17 00:00:00 2001 From: Aaron Thompson Date: Fri, 4 Oct 2024 07:52:45 +0000 Subject: [PATCH 08/16] kbuild: deb-pkg: Remove blank first line from maint scripts The blank line causes execve() to fail: # strace ./postinst execve("./postinst", ...) = -1 ENOEXEC (Exec format error) strace: exec: Exec format error +++ exited with 1 +++ However running the scripts via shell does work (at least with bash) because the shell attempts to execute the file as a shell script when execve() fails. Fixes: b611daae5efc ("kbuild: deb-pkg: split image and debug objects staging out into functions") Signed-off-by: Aaron Thompson Reviewed-by: Nathan Chancellor Reviewed-by: Nicolas Schier Signed-off-by: Masahiro Yamada --- scripts/package/builddeb | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/package/builddeb b/scripts/package/builddeb index c1757db6aa8a..404587fc71fe 100755 --- a/scripts/package/builddeb +++ b/scripts/package/builddeb @@ -74,7 +74,6 @@ install_linux_image () { mkdir -p "${pdir}/DEBIAN" cat <<-EOF > "${pdir}/DEBIAN/${script}" - #!/bin/sh set -e -- 2.51.0 From 8cf0b93919e13d1e8d4466eb4080a4c4d9d66d7b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Oct 2024 15:32:27 -0700 Subject: [PATCH 09/16] Linux 6.12-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 7bb521fae64a..c5493c0c0ca1 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 12 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From f7345ccc62a4b880cf76458db5f320725f28e400 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 10 Oct 2024 18:36:09 +0200 Subject: [PATCH 10/16] rcu/nocb: Fix rcuog wake-up from offline softirq After a CPU has set itself offline and before it eventually calls rcutree_report_cpu_dead(), there are still opportunities for callbacks to be enqueued, for example from a softirq. When that happens on NOCB, the rcuog wake-up is deferred through an IPI to an online CPU in order not to call into the scheduler and risk arming the RT-bandwidth after hrtimers have been migrated out and disabled. But performing a synchronized IPI from a softirq is buggy as reported in the following scenario: WARNING: CPU: 1 PID: 26 at kernel/smp.c:633 smp_call_function_single Modules linked in: rcutorture torture CPU: 1 UID: 0 PID: 26 Comm: migration/1 Not tainted 6.11.0-rc1-00012-g9139f93209d1 #1 Stopper: multi_cpu_stop+0x0/0x320 <- __stop_cpus+0xd0/0x120 RIP: 0010:smp_call_function_single swake_up_one_online __call_rcu_nocb_wake __call_rcu_common ? rcu_torture_one_read call_timer_fn __run_timers run_timer_softirq handle_softirqs irq_exit_rcu ? tick_handle_periodic sysvec_apic_timer_interrupt Fix this with forcing deferred rcuog wake up through the NOCB timer when the CPU is offline. The actual wake up will happen from rcutree_report_cpu_dead(). Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202409231644.4c55582d-lkp@intel.com Fixes: 9139f93209d1 ("rcu/nocb: Fix RT throttling hrtimer armed from offline CPU") Reviewed-by: "Joel Fernandes (Google)" Signed-off-by: Frederic Weisbecker Signed-off-by: Neeraj Upadhyay --- kernel/rcu/tree_nocb.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 97b99cd06923..16865475120b 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -554,13 +554,19 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, TPS("WakeLazy")); - } else if (!irqs_disabled_flags(flags)) { + } else if (!irqs_disabled_flags(flags) && cpu_online(rdp->cpu)) { /* ... if queue was empty ... */ rcu_nocb_unlock(rdp); wake_nocb_gp(rdp, false); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeEmpty")); } else { + /* + * Don't do the wake-up upfront on fragile paths. + * Also offline CPUs can't call swake_up_one_online() from + * (soft-)IRQs. Rely on the final deferred wake-up from + * rcutree_report_cpu_dead() + */ rcu_nocb_unlock(rdp); wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, TPS("WakeEmptyIsDeferred")); -- 2.51.0 From 32693634cdf90e56bd167e5226db7ca569707437 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Oct 2024 11:02:50 -0700 Subject: [PATCH 11/16] torture: Add --no-affinity parameter to kvm.sh In performance tests, it can be counter-productive to spread torture-test guest OSes across sockets. Plus the experimenter might have ideas about what CPUs individual guest OSes are to run on. This commit therefore adds a --no-affinity parameter to kvm.sh to prevent it from running taskset on its guest OSes. Signed-off-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- .../rcutorture/bin/kvm-test-1-run-batch.sh | 43 ++++++++++--------- tools/testing/selftests/rcutorture/bin/kvm.sh | 6 +++ 2 files changed, 29 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh index c3808c490d92..f87046b702d8 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh @@ -56,27 +56,30 @@ do echo > $i/kvm-test-1-run-qemu.sh.out export TORTURE_AFFINITY= kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate - cat << ' ___EOF___' >> $T/cpubatches.awk - END { - affinitylist = ""; - if (!gotcpus()) { - print "echo No CPU-affinity information, so no taskset command."; - } else if (cpu_count !~ /^[0-9][0-9]*$/) { - print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command."; - } else { - affinitylist = nextcpus(cpu_count); - if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/)) - print "echo " scenario ": Bogus CPU-affinity information, so no taskset command."; - else if (!dumpcpustate()) - print "echo " scenario ": Could not dump state, so no taskset command."; - else - print "export TORTURE_AFFINITY=" affinitylist; + if test -z "${TORTURE_NO_AFFINITY}" + then + cat << ' ___EOF___' >> $T/cpubatches.awk + END { + affinitylist = ""; + if (!gotcpus()) { + print "echo No CPU-affinity information, so no taskset command."; + } else if (cpu_count !~ /^[0-9][0-9]*$/) { + print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command."; + } else { + affinitylist = nextcpus(cpu_count); + if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/)) + print "echo " scenario ": Bogus CPU-affinity information, so no taskset command."; + else if (!dumpcpustate()) + print "echo " scenario ": Could not dump state, so no taskset command."; + else + print "export TORTURE_AFFINITY=" affinitylist; + } } - } - ___EOF___ - cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`" - affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`" - $affinity_export + ___EOF___ + cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`" + affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`" + $affinity_export + fi kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 & done for i in $runfiles diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 7af73ddc148d..42e5e8597a1a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -42,6 +42,7 @@ TORTURE_JITTER_STOP="" TORTURE_KCONFIG_KASAN_ARG="" TORTURE_KCONFIG_KCSAN_ARG="" TORTURE_KMAKE_ARG="" +TORTURE_NO_AFFINITY="" TORTURE_QEMU_MEM=512 torture_qemu_mem_default=1 TORTURE_REMOTE= @@ -82,6 +83,7 @@ usage () { echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" echo " --memory megabytes|nnnG" + echo " --no-affinity" echo " --no-initrd" echo " --qemu-args qemu-arguments" echo " --qemu-cmd qemu-system-..." @@ -220,6 +222,9 @@ do torture_qemu_mem_default= shift ;; + --no-affinity) + TORTURE_NO_AFFINITY="no-affinity" + ;; --no-initrd) TORTURE_INITRD=""; export TORTURE_INITRD ;; @@ -417,6 +422,7 @@ TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_K TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG TORTURE_MOD="$TORTURE_MOD"; export TORTURE_MOD +TORTURE_NO_AFFINITY="$TORTURE_NO_AFFINITY"; export TORTURE_NO_AFFINITY TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC -- 2.51.0 From 046c06f5ba97b31da189396e922ebff3f502518e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Oct 2024 11:02:51 -0700 Subject: [PATCH 12/16] refscale: Correct affinity check The current affinity check works fine until there are more reader processes than CPUs, at which point the affinity check is looking for non-existent CPUs. This commit therefore applies the same modulus to the check as is present in the set_cpus_allowed_ptr() call. Signed-off-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- kernel/rcu/refscale.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index 0db9db73f57f..25910ebe95c0 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -829,7 +829,7 @@ repeat: goto end; // Make sure that the CPU is affinitized appropriately during testing. - WARN_ON_ONCE(raw_smp_processor_id() != me); + WARN_ON_ONCE(raw_smp_processor_id() != me % nr_cpu_ids); WRITE_ONCE(rt->start_reader, 0); if (!atomic_dec_return(&n_started)) -- 2.51.0 From ff9ba8db87222be8d344f7c1cc3c28b1e22f6429 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Oct 2024 11:02:52 -0700 Subject: [PATCH 13/16] rcuscale: Add guest_os_delay module parameter This commit adds a guest_os_delay module parameter that extends warm-up and cool-down the specified number of seconds before and after the series of test runs. This allows the data-collection intervals from any given rcuscale guest OSes to line up with active periods in the other rcuscale guest OSes, and also allows the thermal warm-up period required to obtain consistent results from one test to the next. Signed-off-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- kernel/rcu/refscale.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/rcu/refscale.c b/kernel/rcu/refscale.c index 25910ebe95c0..c8374760e003 100644 --- a/kernel/rcu/refscale.c +++ b/kernel/rcu/refscale.c @@ -75,6 +75,9 @@ MODULE_PARM_DESC(scale_type, "Type of test (rcu, srcu, refcnt, rwsem, rwlock."); torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); torture_param(int, verbose_batched, 0, "Batch verbose debugging printk()s"); +// Number of seconds to extend warm-up and cool-down for multiple guest OSes +torture_param(long, guest_os_delay, 0, + "Number of seconds to extend warm-up/cool-down for multiple guest OSes."); // Wait until there are multiple CPUs before starting test. torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0, "Holdoff time before test start (s)"); @@ -801,6 +804,18 @@ static void rcu_scale_one_reader(void) cur_ops->delaysection(loops, readdelay / 1000, readdelay % 1000); } +// Warm up cache, or, if needed run a series of rcu_scale_one_reader() +// to allow multiple rcuscale guest OSes to collect mutually valid data. +static void rcu_scale_warm_cool(void) +{ + unsigned long jdone = jiffies + (guest_os_delay > 0 ? guest_os_delay * HZ : -1); + + do { + rcu_scale_one_reader(); + cond_resched(); + } while (time_before(jiffies, jdone)); +} + // Reader kthread. Repeatedly does empty RCU read-side // critical section, minimizing update-side interference. static int @@ -957,6 +972,7 @@ static int main_func(void *arg) schedule_timeout_uninterruptible(1); // Start exp readers up per experiment + rcu_scale_warm_cool(); for (exp = 0; exp < nruns && !torture_must_stop(); exp++) { if (torture_must_stop()) goto end; @@ -987,6 +1003,7 @@ static int main_func(void *arg) result_avg[exp] = div_u64(1000 * process_durations(nreaders), nreaders * loops); } + rcu_scale_warm_cool(); // Print the average of all experiments SCALEOUT("END OF TEST. Calculating average duration per loop (nanoseconds)...\n"); -- 2.51.0 From 80e935c8c154d8fbdd85a20d89b4962662ceddd7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Oct 2024 11:02:53 -0700 Subject: [PATCH 14/16] rcutorture: Avoid printing cpu=-1 for no-fault RCU boost failure If a CPU runs throughout the stalled grace period without passing through a quiescent state, RCU priority boosting cannot help. The rcu_torture_boost_failed() function therefore prints a message flagging the first such CPU. However, if the stall was instead due to (for example) RCU's grace-period kthread being starved of CPU, there will be no such CPU, causing rcu_check_boost_fail() to instead pass back -1 through its cpup CPU-pointer parameter. Therefore, the current message complains about a mythical CPU -1. This commit therefore checks for this situation, and notes that all CPUs have passed through a quiescent state. Signed-off-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay Signed-off-by: Frederic Weisbecker --- kernel/rcu/rcutorture.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index bb75dbf5c800..e92fa97fc76f 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1059,8 +1059,13 @@ static bool rcu_torture_boost_failed(unsigned long gp_state, unsigned long *star // At most one persisted message per boost test. j = jiffies; lp = READ_ONCE(last_persist); - if (time_after(j, lp + mininterval) && cmpxchg(&last_persist, lp, j) == lp) - pr_info("Boost inversion persisted: No QS from CPU %d\n", cpu); + if (time_after(j, lp + mininterval) && + cmpxchg(&last_persist, lp, j) == lp) { + if (cpu < 0) + pr_info("Boost inversion persisted: QS from all CPUs\n"); + else + pr_info("Boost inversion persisted: No QS from CPU %d\n", cpu); + } return false; // passed on a technicality } VERBOSE_TOROUT_STRING("rcu_torture_boost boosting failed"); -- 2.51.0 From 812a1c3b9f7c36d9255f0d29d0a3d324e2f52321 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 13 Nov 2024 12:00:08 +0100 Subject: [PATCH 15/16] rcuscale: Do a proper cleanup if kfree_scale_init() fails A static analyzer for C, Smatch, reports and triggers below warnings: kernel/rcu/rcuscale.c:1215 rcu_scale_init() warn: inconsistent returns 'global &fullstop_mutex'. The checker complains about, we do not unlock the "fullstop_mutex" mutex, in case of hitting below error path: ... if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) { pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n"); WARN_ON_ONCE(1); return -1; ^^^^^^^^^^ ... it happens because "-1" is returned right away instead of doing a proper unwinding. Fix it by jumping to "unwind" label instead of returning -1. Reported-by: Dan Carpenter Reviewed-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay Closes: https://lore.kernel.org/rcu/ZxfTrHuEGtgnOYWp@pc636/T/ Fixes: 084e04fff160 ("rcuscale: Add laziness and kfree tests") Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Frederic Weisbecker --- kernel/rcu/rcuscale.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index 6d37596deb1f..d360fa44b234 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -890,13 +890,15 @@ kfree_scale_init(void) if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) { pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n"); WARN_ON_ONCE(1); - return -1; + firsterr = -1; + goto unwind; } if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start > 3 * HZ)) { pr_alert("ERROR: call_rcu() CBs are being too lazy!\n"); WARN_ON_ONCE(1); - return -1; + firsterr = -1; + goto unwind; } } -- 2.51.0 From c229d579d047f9c4fb4d6c37d9d04b88a398e461 Mon Sep 17 00:00:00 2001 From: "Uladzislau Rezki (Sony)" Date: Wed, 13 Nov 2024 12:00:09 +0100 Subject: [PATCH 16/16] rcuscale: Remove redundant WARN_ON_ONCE() splat There are two places where WARN_ON_ONCE() is called two times in the error paths. One which is encapsulated into if() condition and another one, which is unnecessary, is placed in the brackets. Remove an extra WARN_ON_ONCE() splat which is in brackets. Reviewed-by: Paul E. McKenney Reviewed-by: Neeraj Upadhyay Signed-off-by: Uladzislau Rezki (Sony) Signed-off-by: Frederic Weisbecker --- kernel/rcu/rcuscale.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c index d360fa44b234..0f3059b1b80d 100644 --- a/kernel/rcu/rcuscale.c +++ b/kernel/rcu/rcuscale.c @@ -889,14 +889,12 @@ kfree_scale_init(void) if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) { pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n"); - WARN_ON_ONCE(1); firsterr = -1; goto unwind; } if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start > 3 * HZ)) { pr_alert("ERROR: call_rcu() CBs are being too lazy!\n"); - WARN_ON_ONCE(1); firsterr = -1; goto unwind; } -- 2.51.0