From 994633894f208a0151baaee1688ab3c431912553 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 12:53:18 -0400 Subject: [PATCH 01/16] tools/power turbostat: re-factor sysfs code Probe cpuidle "sysfs" residency and counts separately, since soon we will make one disabled on, and the other disabled off. Clarify that some BIC (build-in-counters) are actually "groups". since we're about to re-name some of those groups. no functional change. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 31 ++++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c9a34c16c7a8..df0391bedcde 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -273,10 +273,10 @@ struct msr_counter bic[] = { #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) -#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) -#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) -#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) +#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) +#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) +#define BIC_GROUP_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -2354,16 +2354,16 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) retval |= ~0; break; } else if (!strcmp(name_list, "topology")) { - retval |= BIC_TOPOLOGY; + retval |= BIC_GROUP_TOPOLOGY; break; } else if (!strcmp(name_list, "power")) { - retval |= BIC_THERMAL_PWR; + retval |= BIC_GROUP_THERMAL_PWR; break; } else if (!strcmp(name_list, "idle")) { - retval |= BIC_IDLE; + retval |= BIC_GROUP_IDLE; break; } else if (!strcmp(name_list, "frequency")) { - retval |= BIC_FREQUENCY; + retval |= BIC_GROUP_FREQUENCY; break; } else if (!strcmp(name_list, "other")) { retval |= BIC_OTHER; @@ -10260,7 +10260,7 @@ int is_deferred_skip(char *name) return 0; } -void probe_sysfs(void) +void probe_cpuidle_residency(void) { char path[64]; char name_buf[16]; @@ -10304,6 +10304,16 @@ void probe_sysfs(void) if (state < min_state) min_state = state; } +} + +void probe_cpuidle_counts(void) +{ + char path[64]; + char name_buf[16]; + FILE *input; + int state; + int min_state = 1024, max_state = 0; + char *sp; for (state = 10; state >= 0; --state) { @@ -10602,7 +10612,8 @@ skip_cgroup_setting: print_bootcmd(); } - probe_sysfs(); + probe_cpuidle_residency(); + probe_cpuidle_counts(); if (!getuid()) set_rlimit(); -- 2.51.0 From 6f110a5e4f9977c31ce76fefbfef6fd4eab6bfb7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Apr 2025 10:00:04 -0700 Subject: [PATCH 02/16] Disable SLUB_TINY for build testing ... and don't error out so hard on missing module descriptions. Before commit 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") we used to warn about missing module descriptions, but only when building with extra warnigns (ie 'W=1'). After that commit the warning became an unconditional hard error. And it turns out not all modules have been converted despite the claims to the contrary. As reported by Damian Tometzki, the slub KUnit test didn't have a module description, and apparently nobody ever really noticed. The reason nobody noticed seems to be that the slub KUnit tests get disabled by SLUB_TINY, which also ends up disabling a lot of other code, both in tests and in slub itself. And so anybody doing full build tests didn't actually see this failre. So let's disable SLUB_TINY for build-only tests, since it clearly ends up limiting build coverage. Also turn the missing module descriptions error back into a warning, but let's keep it around for non-'W=1' builds. Reported-by: Damian Tometzki Link: https://lore.kernel.org/all/01070196099fd059-e8463438-7b1b-4ec8-816d-173874be9966-000000@eu-central-1.amazonses.com/ Cc: Masahiro Yamada Cc: Jeff Johnson Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Linus Torvalds --- mm/Kconfig | 2 +- scripts/mod/modpost.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index d3fb3762887b..e113f713b493 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -201,7 +201,7 @@ config KVFREE_RCU_BATCHED config SLUB_TINY bool "Configure for minimal memory footprint" - depends on EXPERT + depends on EXPERT && !COMPILE_TEST select SLAB_MERGE_DEFAULT help Configures the slab allocator in a way to achieve minimal memory diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 92627e8d0e16..be89921d60b6 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1603,7 +1603,7 @@ static void read_symbols(const char *modname) } if (!get_modinfo(&info, "description")) - error("missing MODULE_DESCRIPTION() in %s\n", modname); + warn("missing MODULE_DESCRIPTION() in %s\n", modname); } for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { -- 2.51.0 From ec4acd3166d8a7a03b059d01b9c6f11a658e833f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 14:29:57 -0400 Subject: [PATCH 03/16] tools/power turbostat: disable "cpuidle" invocation counters, by default Create "pct_idle" counter group, the sofware notion of residency so it can now be singled out, independent of other counter groups. Create "cpuidle" group, the cpuidle invocation counts. Disable "cpuidle", by default. Create "swidle" = "cpuidle" + "pct_idle". Undocument "sysfs", the old name for "swidle", but keep it working for backwards compatibilty. Create "hwidle", all the HW idle counters Modify "idle", enabled by default "idle" = "hwidle" + "pct_idle" (and now excludes "cpuidle") Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 12 +++++----- tools/power/x86/turbostat/turbostat.c | 34 +++++++++++++++++++++------ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e86493880c16..b74ed916057e 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. .PP -\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other". +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "idle_pct". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "idle_pct". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". .PP \fB--Dump\fP displays the raw counter values. .PP @@ -158,15 +158,15 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. .PP -\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. +\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. +\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. +\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. +\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default. .PP -\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. +\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default. .PP \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. .PP diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index df0391bedcde..ab184f95cdaf 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -153,7 +153,7 @@ struct msr_counter bic[] = { { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, - { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, @@ -206,6 +206,7 @@ struct msr_counter bic[] = { { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -219,7 +220,7 @@ struct msr_counter bic[] = { #define BIC_TSC_MHz (1ULL << 7) #define BIC_IRQ (1ULL << 8) #define BIC_SMI (1ULL << 9) -#define BIC_sysfs (1ULL << 10) +#define BIC_cpuidle (1ULL << 10) #define BIC_CPU_c1 (1ULL << 11) #define BIC_CPU_c3 (1ULL << 12) #define BIC_CPU_c6 (1ULL << 13) @@ -272,17 +273,20 @@ struct msr_counter bic[] = { #define BIC_Sys_J (1ULL << 60) #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) +#define BIC_pct_idle (1ULL << 63) #define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) #define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) #define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_GROUP_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) +#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) @@ -2362,6 +2366,15 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } else if (!strcmp(name_list, "idle")) { retval |= BIC_GROUP_IDLE; break; + } else if (!strcmp(name_list, "swidle")) { + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "hwidle")) { + retval |= BIC_GROUP_HW_IDLE; + break; } else if (!strcmp(name_list, "frequency")) { retval |= BIC_GROUP_FREQUENCY; break; @@ -2372,6 +2385,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } if (i == MAX_BIC) { + fprintf(stderr, "deferred %s\n", name_list); if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { @@ -10269,6 +10283,9 @@ void probe_cpuidle_residency(void) int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_pct_idle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10291,7 +10308,7 @@ void probe_cpuidle_residency(void) sprintf(path, "cpuidle/state%d/time", state); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) @@ -10315,6 +10332,9 @@ void probe_cpuidle_counts(void) int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_cpuidle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10327,7 +10347,7 @@ void probe_cpuidle_counts(void) remove_underbar(name_buf); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) -- 2.51.0 From 03e00e373cab981ad808271b2650700cfa0fbda6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 14:49:20 -0400 Subject: [PATCH 04/16] tools/power turbostat: v2025.05.06 Support up to 8192 processors Add cpuidle governor debug telemetry, disabled by default Update default output to exclude cpuidle invocation counts Bug fixes Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ab184f95cdaf..1b9fdc1a7ee8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9594,7 +9594,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.02.02 - Len Brown \n"); + fprintf(outf, "turbostat version 2025.04.06 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 -- 2.51.0 From 0efdedb3358aa78102967f242379686f94315830 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 2 Apr 2025 21:21:57 +0100 Subject: [PATCH 05/16] tools/include: make uapi/linux/types.h usable from assembly MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The "real" linux/types.h UAPI header gracefully degrades to a NOOP when included from assembly code. Mirror this behaviour in the tools/ variant. Test for __ASSEMBLER__ over __ASSEMBLY__ as the former is provided by the toolchain automatically. Reported-by: Mark Brown Closes: https://lore.kernel.org/lkml/af553c62-ca2f-4956-932c-dd6e3a126f58@sirena.org.uk/ Fixes: c9fbaa879508 ("selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers") Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20250321-uapi-consistency-v1-1-439070118dc0@linutronix.de Signed-off-by: Mark Brown Reviewed-by: Mark Brown Signed-off-by: Linus Torvalds --- tools/include/uapi/linux/types.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h index 91fa51a9c31d..85aa327245c6 100644 --- a/tools/include/uapi/linux/types.h +++ b/tools/include/uapi/linux/types.h @@ -4,6 +4,8 @@ #include +#ifndef __ASSEMBLER__ + /* copied from linux:include/uapi/linux/types.h */ #define __bitwise typedef __u16 __bitwise __le16; @@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum; #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_LINUX_TYPES_H */ -- 2.51.0 From 0af2f6be1b4281385b618cb86ad946eded089ac8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Apr 2025 13:11:33 -0700 Subject: [PATCH 06/16] Linux 6.15-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e55726a71d95..38689a0c3605 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 14 +PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From cfdb7520f901a9696e1dc825b0a8ed9c664d58aa Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Tue, 18 Mar 2025 09:07:55 +0100 Subject: [PATCH 07/16] PM: hibernate: Remove size arguments when calling strscpy() The size parameter is optional and strscpy() automatically determines the length of the destination buffer using sizeof() if the argument is omitted. This makes the explicit sizeof() calls unnecessary. Remove them to shorten and simplify the code. Signed-off-by: Thorsten Blum Link: https://patch.msgid.link/20250318080755.61126-2-thorsten.blum@linux.dev Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 23c0f4e6cb2f..f0db9d1896e8 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -757,7 +757,7 @@ int hibernate(void) * Query for the compression algorithm support if compression is enabled. */ if (!nocompress) { - strscpy(hib_comp_algo, hibernate_compressor, sizeof(hib_comp_algo)); + strscpy(hib_comp_algo, hibernate_compressor); if (!crypto_has_acomp(hib_comp_algo, 0, CRYPTO_ALG_ASYNC)) { pr_err("%s compression is not available\n", hib_comp_algo); return -EOPNOTSUPP; @@ -1006,9 +1006,9 @@ static int software_resume(void) */ if (!(swsusp_header_flags & SF_NOCOMPRESS_MODE)) { if (swsusp_header_flags & SF_COMPRESSION_ALG_LZ4) - strscpy(hib_comp_algo, COMPRESSION_ALGO_LZ4, sizeof(hib_comp_algo)); + strscpy(hib_comp_algo, COMPRESSION_ALGO_LZ4); else - strscpy(hib_comp_algo, COMPRESSION_ALGO_LZO, sizeof(hib_comp_algo)); + strscpy(hib_comp_algo, COMPRESSION_ALGO_LZO); if (!crypto_has_acomp(hib_comp_algo, 0, CRYPTO_ALG_ASYNC)) { pr_err("%s compression is not available\n", hib_comp_algo); error = -EOPNOTSUPP; @@ -1456,8 +1456,7 @@ static int hibernate_compressor_param_set(const char *compressor, if (index >= 0) { ret = param_set_copystring(comp_alg_enabled[index], kp); if (!ret) - strscpy(hib_comp_algo, comp_alg_enabled[index], - sizeof(hib_comp_algo)); + strscpy(hib_comp_algo, comp_alg_enabled[index]); } else { ret = index; } -- 2.51.0 From 0cbef962ce1ff344ecfe32d1c874978f1f7d410a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 14 Mar 2025 13:50:09 +0100 Subject: [PATCH 08/16] PM: sleep: Resume children after resuming the parent According to [1], the handling of device suspend and resume, and particularly the latter, involves unnecessary overhead related to starting new async work items for devices that cannot make progress right away because they have to wait for other devices. To reduce this problem in the resume path, use the observation that starting the async resume of the children of a device after resuming the parent is likely to produce less scheduling and memory management noise than starting it upfront while at the same time it should not increase the resume duration substantially. Accordingly, modify the code to start the async resume of the device's children when the processing of the parent has been completed in each stage of device resume and only start async resume upfront for devices without parents. Also make it check if a given device can be resumed asynchronously before starting the synchronous resume of it in case it will have to wait for another that is already resuming asynchronously. In addition to making the async resume of devices more friendly to systems with relatively less computing resources, this change is also preliminary for analogous changes in the suspend path. On the systems where it has been tested, this change by itself does not affect the overall system resume duration in a measurable way. Link: https://lore.kernel.org/linux-pm/20241114220921.2529905-1-saravanak@google.com/ [1] Suggested-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/22630663.EfDdHjke4D@rjwysocki.net --- drivers/base/power/main.c | 85 ++++++++++++++++++++++++++++++--------- 1 file changed, 66 insertions(+), 19 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index c8b0a9e29ed8..aec774c02ffc 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -63,6 +63,7 @@ static LIST_HEAD(dpm_noirq_list); static DEFINE_MUTEX(dpm_list_mtx); static pm_message_t pm_transition; +static DEFINE_MUTEX(async_wip_mtx); static int async_error; static const char *pm_verb(int event) @@ -597,8 +598,11 @@ static bool is_async(struct device *dev) && !pm_trace_is_enabled(); } -static bool dpm_async_fn(struct device *dev, async_func_t func) +static bool __dpm_async(struct device *dev, async_func_t func) { + if (dev->power.work_in_progress) + return true; + if (!is_async(dev)) return false; @@ -611,14 +615,37 @@ static bool dpm_async_fn(struct device *dev, async_func_t func) put_device(dev); + return false; +} + +static bool dpm_async_fn(struct device *dev, async_func_t func) +{ + guard(mutex)(&async_wip_mtx); + + return __dpm_async(dev, func); +} + +static int dpm_async_with_cleanup(struct device *dev, void *fn) +{ + guard(mutex)(&async_wip_mtx); + + if (!__dpm_async(dev, fn)) + dev->power.work_in_progress = false; + + return 0; +} + +static void dpm_async_resume_children(struct device *dev, async_func_t func) +{ /* - * async_schedule_dev_nocall() above has returned false, so func() is - * not running and it is safe to update power.work_in_progress without - * extra synchronization. + * Start processing "async" children of the device unless it's been + * started already for them. + * + * This could have been done for the device's "async" consumers too, but + * they either need to wait for their parents or the processing has + * already started for them after their parents were processed. */ - dev->power.work_in_progress = false; - - return false; + device_for_each_child(dev, func, dpm_async_with_cleanup); } static void dpm_clear_async_state(struct device *dev) @@ -627,6 +654,13 @@ static void dpm_clear_async_state(struct device *dev) dev->power.work_in_progress = false; } +static bool dpm_root_device(struct device *dev) +{ + return !dev->parent; +} + +static void async_resume_noirq(void *data, async_cookie_t cookie); + /** * device_resume_noirq - Execute a "noirq resume" callback for given device. * @dev: Device to handle. @@ -710,6 +744,8 @@ Out: dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async noirq" : " noirq", error); } + + dpm_async_resume_children(dev, async_resume_noirq); } static void async_resume_noirq(void *data, async_cookie_t cookie) @@ -733,19 +769,20 @@ static void dpm_noirq_resume_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); /* - * Trigger the resume of "async" devices upfront so they don't have to - * wait for the "non-async" ones they don't depend on. + * Start processing "async" root devices upfront so they don't wait for + * the "sync" devices they don't depend on. */ list_for_each_entry(dev, &dpm_noirq_list, power.entry) { dpm_clear_async_state(dev); - dpm_async_fn(dev, async_resume_noirq); + if (dpm_root_device(dev)) + dpm_async_with_cleanup(dev, async_resume_noirq); } while (!list_empty(&dpm_noirq_list)) { dev = to_device(dpm_noirq_list.next); list_move_tail(&dev->power.entry, &dpm_late_early_list); - if (!dev->power.work_in_progress) { + if (!dpm_async_fn(dev, async_resume_noirq)) { get_device(dev); mutex_unlock(&dpm_list_mtx); @@ -781,6 +818,8 @@ void dpm_resume_noirq(pm_message_t state) device_wakeup_disarm_wake_irqs(); } +static void async_resume_early(void *data, async_cookie_t cookie); + /** * device_resume_early - Execute an "early resume" callback for given device. * @dev: Device to handle. @@ -848,6 +887,8 @@ Out: dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async early" : " early", error); } + + dpm_async_resume_children(dev, async_resume_early); } static void async_resume_early(void *data, async_cookie_t cookie) @@ -875,19 +916,20 @@ void dpm_resume_early(pm_message_t state) mutex_lock(&dpm_list_mtx); /* - * Trigger the resume of "async" devices upfront so they don't have to - * wait for the "non-async" ones they don't depend on. + * Start processing "async" root devices upfront so they don't wait for + * the "sync" devices they don't depend on. */ list_for_each_entry(dev, &dpm_late_early_list, power.entry) { dpm_clear_async_state(dev); - dpm_async_fn(dev, async_resume_early); + if (dpm_root_device(dev)) + dpm_async_with_cleanup(dev, async_resume_early); } while (!list_empty(&dpm_late_early_list)) { dev = to_device(dpm_late_early_list.next); list_move_tail(&dev->power.entry, &dpm_suspended_list); - if (!dev->power.work_in_progress) { + if (!dpm_async_fn(dev, async_resume_early)) { get_device(dev); mutex_unlock(&dpm_list_mtx); @@ -919,6 +961,8 @@ void dpm_resume_start(pm_message_t state) } EXPORT_SYMBOL_GPL(dpm_resume_start); +static void async_resume(void *data, async_cookie_t cookie); + /** * device_resume - Execute "resume" callbacks for given device. * @dev: Device to handle. @@ -1018,6 +1062,8 @@ static void device_resume(struct device *dev, pm_message_t state, bool async) dpm_save_failed_dev(dev_name(dev)); pm_dev_err(dev, state, async ? " async" : "", error); } + + dpm_async_resume_children(dev, async_resume); } static void async_resume(void *data, async_cookie_t cookie) @@ -1049,19 +1095,20 @@ void dpm_resume(pm_message_t state) mutex_lock(&dpm_list_mtx); /* - * Trigger the resume of "async" devices upfront so they don't have to - * wait for the "non-async" ones they don't depend on. + * Start processing "async" root devices upfront so they don't wait for + * the "sync" devices they don't depend on. */ list_for_each_entry(dev, &dpm_suspended_list, power.entry) { dpm_clear_async_state(dev); - dpm_async_fn(dev, async_resume); + if (dpm_root_device(dev)) + dpm_async_with_cleanup(dev, async_resume); } while (!list_empty(&dpm_suspended_list)) { dev = to_device(dpm_suspended_list.next); list_move_tail(&dev->power.entry, &dpm_prepared_list); - if (!dev->power.work_in_progress) { + if (!dpm_async_fn(dev, async_resume)) { get_device(dev); mutex_unlock(&dpm_list_mtx); -- 2.51.0 From aa7a9275ab814705b60ba8274277d91da6ab6122 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 14 Mar 2025 14:13:53 +0100 Subject: [PATCH 09/16] PM: sleep: Suspend async parents after suspending children In analogy with the previous change affecting the resume path, make device_suspend() start the async suspend of the device's parent after the device itself has been processed and make dpm_suspend() start processing "async" leaf devices (that is, devices without children) upfront so they don't need to wait for the "sync" devices they don't depend on. On the Dell XPS13 9360 in my office, this change reduces the total duration of device suspend by approximately 100 ms (over 20%). Suggested-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/3541233.QJadu78ljV@rjwysocki.net --- drivers/base/power/main.c | 67 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index aec774c02ffc..7a767c8488dd 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1236,6 +1236,41 @@ EXPORT_SYMBOL_GPL(dpm_resume_end); /*------------------------- Suspend routines -------------------------*/ +static bool dpm_leaf_device(struct device *dev) +{ + struct device *child; + + lockdep_assert_held(&dpm_list_mtx); + + child = device_find_any_child(dev); + if (child) { + put_device(child); + + return false; + } + + return true; +} + +static void dpm_async_suspend_parent(struct device *dev, async_func_t func) +{ + guard(mutex)(&dpm_list_mtx); + + /* + * If the device is suspended asynchronously and the parent's callback + * deletes both the device and the parent itself, the parent object may + * be freed while this function is running, so avoid that by checking + * if the device has been deleted already as the parent cannot be + * deleted before it. + */ + if (!device_pm_initialized(dev)) + return; + + /* Start processing the device's parent if it is "async". */ + if (dev->parent) + dpm_async_with_cleanup(dev->parent, func); +} + /** * resume_event - Return a "resume" message for given "suspend" sleep state. * @sleep_state: PM message representing a sleep state. @@ -1661,6 +1696,8 @@ static void dpm_clear_superiors_direct_complete(struct device *dev) device_links_read_unlock(idx); } +static void async_suspend(void *data, async_cookie_t cookie); + /** * device_suspend - Execute "suspend" callbacks for given device. * @dev: Device to handle. @@ -1790,7 +1827,13 @@ static int device_suspend(struct device *dev, pm_message_t state, bool async) complete_all(&dev->power.completion); TRACE_SUSPEND(error); - return error; + + if (error || async_error) + return error; + + dpm_async_suspend_parent(dev, async_suspend); + + return 0; } static void async_suspend(void *data, async_cookie_t cookie) @@ -1808,6 +1851,7 @@ static void async_suspend(void *data, async_cookie_t cookie) int dpm_suspend(pm_message_t state) { ktime_t starttime = ktime_get(); + struct device *dev; int error = 0; trace_suspend_resume(TPS("dpm_suspend"), state.event, true); @@ -1821,12 +1865,21 @@ int dpm_suspend(pm_message_t state) mutex_lock(&dpm_list_mtx); + /* + * Start processing "async" leaf devices upfront so they don't need to + * wait for the "sync" devices they don't depend on. + */ + list_for_each_entry_reverse(dev, &dpm_prepared_list, power.entry) { + dpm_clear_async_state(dev); + if (dpm_leaf_device(dev)) + dpm_async_with_cleanup(dev, async_suspend); + } + while (!list_empty(&dpm_prepared_list)) { - struct device *dev = to_device(dpm_prepared_list.prev); + dev = to_device(dpm_prepared_list.prev); list_move(&dev->power.entry, &dpm_suspended_list); - dpm_clear_async_state(dev); if (dpm_async_fn(dev, async_suspend)) continue; @@ -1840,8 +1893,14 @@ int dpm_suspend(pm_message_t state) mutex_lock(&dpm_list_mtx); - if (error || async_error) + if (error || async_error) { + /* + * Move all devices to the target list to resume them + * properly. + */ + list_splice(&dpm_prepared_list, &dpm_suspended_list); break; + } } mutex_unlock(&dpm_list_mtx); -- 2.51.0 From 443046d1ad66607f324c604b9fbdf11266fa8aad Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 14 Mar 2025 14:14:30 +0100 Subject: [PATCH 10/16] PM: sleep: Make suspend of devices more asynchronous In analogy with previous changes, make device_suspend_late() and device_suspend_noirq() start the async suspend of the device's parent after the device itself has been processed and make dpm_suspend_late() and dpm_noirq_suspend_devices() start processing "async" leaf devices (that is, devices without children) upfront so they don't need to wait for the other devices they don't depend on. This change reduces the total duration of device suspend on some systems measurably, but not significantly. Suggested-by: Saravana Kannan Signed-off-by: Rafael J. Wysocki Link: https://patch.msgid.link/1924195.CQOukoFCf9@rjwysocki.net --- drivers/base/power/main.c | 64 ++++++++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 7a767c8488dd..bb0eb2e393f3 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1308,6 +1308,8 @@ static void dpm_superior_set_must_resume(struct device *dev) device_links_read_unlock(idx); } +static void async_suspend_noirq(void *data, async_cookie_t cookie); + /** * device_suspend_noirq - Execute a "noirq suspend" callback for given device. * @dev: Device to handle. @@ -1386,7 +1388,13 @@ Skip: Complete: complete_all(&dev->power.completion); TRACE_SUSPEND(error); - return error; + + if (error || async_error) + return error; + + dpm_async_suspend_parent(dev, async_suspend_noirq); + + return 0; } static void async_suspend_noirq(void *data, async_cookie_t cookie) @@ -1400,6 +1408,7 @@ static void async_suspend_noirq(void *data, async_cookie_t cookie) static int dpm_noirq_suspend_devices(pm_message_t state) { ktime_t starttime = ktime_get(); + struct device *dev; int error = 0; trace_suspend_resume(TPS("dpm_suspend_noirq"), state.event, true); @@ -1409,12 +1418,21 @@ static int dpm_noirq_suspend_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); + /* + * Start processing "async" leaf devices upfront so they don't need to + * wait for the "sync" devices they don't depend on. + */ + list_for_each_entry_reverse(dev, &dpm_late_early_list, power.entry) { + dpm_clear_async_state(dev); + if (dpm_leaf_device(dev)) + dpm_async_with_cleanup(dev, async_suspend_noirq); + } + while (!list_empty(&dpm_late_early_list)) { - struct device *dev = to_device(dpm_late_early_list.prev); + dev = to_device(dpm_late_early_list.prev); list_move(&dev->power.entry, &dpm_noirq_list); - dpm_clear_async_state(dev); if (dpm_async_fn(dev, async_suspend_noirq)) continue; @@ -1428,8 +1446,14 @@ static int dpm_noirq_suspend_devices(pm_message_t state) mutex_lock(&dpm_list_mtx); - if (error || async_error) + if (error || async_error) { + /* + * Move all devices to the target list to resume them + * properly. + */ + list_splice(&dpm_late_early_list, &dpm_noirq_list); break; + } } mutex_unlock(&dpm_list_mtx); @@ -1482,6 +1506,8 @@ static void dpm_propagate_wakeup_to_parent(struct device *dev) spin_unlock_irq(&parent->power.lock); } +static void async_suspend_late(void *data, async_cookie_t cookie); + /** * device_suspend_late - Execute a "late suspend" callback for given device. * @dev: Device to handle. @@ -1558,7 +1584,13 @@ Skip: Complete: TRACE_SUSPEND(error); complete_all(&dev->power.completion); - return error; + + if (error || async_error) + return error; + + dpm_async_suspend_parent(dev, async_suspend_late); + + return 0; } static void async_suspend_late(void *data, async_cookie_t cookie) @@ -1576,6 +1608,7 @@ static void async_suspend_late(void *data, async_cookie_t cookie) int dpm_suspend_late(pm_message_t state) { ktime_t starttime = ktime_get(); + struct device *dev; int error = 0; trace_suspend_resume(TPS("dpm_suspend_late"), state.event, true); @@ -1587,12 +1620,21 @@ int dpm_suspend_late(pm_message_t state) mutex_lock(&dpm_list_mtx); + /* + * Start processing "async" leaf devices upfront so they don't need to + * wait for the "sync" devices they don't depend on. + */ + list_for_each_entry_reverse(dev, &dpm_suspended_list, power.entry) { + dpm_clear_async_state(dev); + if (dpm_leaf_device(dev)) + dpm_async_with_cleanup(dev, async_suspend_late); + } + while (!list_empty(&dpm_suspended_list)) { - struct device *dev = to_device(dpm_suspended_list.prev); + dev = to_device(dpm_suspended_list.prev); list_move(&dev->power.entry, &dpm_late_early_list); - dpm_clear_async_state(dev); if (dpm_async_fn(dev, async_suspend_late)) continue; @@ -1606,8 +1648,14 @@ int dpm_suspend_late(pm_message_t state) mutex_lock(&dpm_list_mtx); - if (error || async_error) + if (error || async_error) { + /* + * Move all devices to the target list to resume them + * properly. + */ + list_splice(&dpm_suspended_list, &dpm_late_early_list); break; + } } mutex_unlock(&dpm_list_mtx); -- 2.51.0 From 036c94c9fd05824f86a92db115eba2773cc79231 Mon Sep 17 00:00:00 2001 From: Andrew Sayers Date: Fri, 11 Apr 2025 16:25:04 +0100 Subject: [PATCH 11/16] PM: sleep: Use two lines for "Restarting..." / "done" messages Other messages are occasionally printed between these two, for example: [203104.106534] Restarting tasks ... [203104.106559] mei_hdcp 0000:00:16.0-b638ab7e-94e2-4ea2-a552-d1c54b627f04: bound 0000:00:02.0 (ops i915_hdcp_ops [i915]) [203104.112354] done. This seems to be a timing issue, seen in two of the eleven hibernation exits in my current `dmesg` output. When printed on its own, the "done" message has the default log level. This makes the output of `dmesg --level=warn` quite misleading. Add enough context for the "done" messages to make sense on their own, and use the same log level for all messages. Change the messages to "..." / "Done .", unlike a449dfbfc089 which uses "..." / " completed.". Front-loading the unique part of the message makes it easier to scan the log, and reduces ambiguity for users who aren't confident in their English comprehension. Reviewed-by: Lucas De Marchi Signed-off-by: Andrew Sayers Link: https://patch.msgid.link/20250411152632.2806038-1-kernel.org@pileofstuff.org Signed-off-by: Rafael J. Wysocki --- kernel/power/process.c | 8 ++++---- tools/power/pm-graph/sleepgraph.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index 66ac067d9ae6..4c674282df03 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -189,7 +189,7 @@ void thaw_processes(void) oom_killer_enable(); - pr_info("Restarting tasks ... "); + pr_info("Restarting tasks ...\n"); __usermodehelper_set_disable_depth(UMH_FREEZING); thaw_workqueues(); @@ -208,7 +208,7 @@ void thaw_processes(void) usermodehelper_enable(); schedule(); - pr_cont("done.\n"); + pr_info("Done restarting tasks.\n"); trace_suspend_resume(TPS("thaw_processes"), 0, false); } @@ -217,7 +217,7 @@ void thaw_kernel_threads(void) struct task_struct *g, *p; pm_nosig_freezing = false; - pr_info("Restarting kernel threads ... "); + pr_info("Restarting kernel threads ...\n"); thaw_workqueues(); @@ -229,5 +229,5 @@ void thaw_kernel_threads(void) read_unlock(&tasklist_lock); schedule(); - pr_cont("done.\n"); + pr_info("Done restarting kernel threads.\n"); } diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index e2261f33a082..1555b51a7d55 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -4017,7 +4017,8 @@ def parseKernelLog(data): 'PM: early restore of devices complete after.*'], 'resume_complete': ['PM: resume of devices complete after.*', 'PM: restore of devices complete after.*'], - 'post_resume': [r'.*Restarting tasks \.\.\..*'], + 'post_resume': [r'.*Restarting tasks \.\.\..*', + 'Done restarting tasks.*'], } # action table (expected events that occur and show up in dmesg) -- 2.51.0 From 9cd9d3fdd72b21b9ea47a8e1a08959d833b6e6ef Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Thu, 24 Apr 2025 14:03:39 +0800 Subject: [PATCH 12/16] PM: sleep: Remove unnecessary !! Since initcall_debug is a bool variable, it is not necessary to convert it to bool with the help of a double logical negation (!!). Remove the redundant operation. Signed-off-by: Zihuan Zhang Link: https://patch.msgid.link/20250424060339.73119-1-zhangzihuan@kylinos.cn [ rjw: Changelog rewrite ] Signed-off-by: Rafael J. Wysocki --- kernel/power/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/main.c b/kernel/power/main.c index 6254814d4817..97746f08b762 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -594,7 +594,7 @@ power_attr(pm_print_times); static inline void pm_print_times_init(void) { - pm_print_times_enabled = !!initcall_debug; + pm_print_times_enabled = initcall_debug; } static ssize_t pm_wakeup_irq_show(struct kobject *kobj, -- 2.51.0 From 228710e8db164011d9a4dd6944307a361e3185e2 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 5 May 2025 18:17:04 +0800 Subject: [PATCH 13/16] PM: wakeup: Add missing wakeup source attribute relax_count There is wakeup source attribute 'active_count', but its counterpart attribute 'relax_count' is missing. Add 'relax_count' for consistency. Signed-off-by: Zijun Hu Link: https://patch.msgid.link/20250505-add_power_attrs-v1-1-10bc3c73c320@quicinc.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeup_stats.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/base/power/wakeup_stats.c b/drivers/base/power/wakeup_stats.c index 6732ed2869f9..3ffd427248e8 100644 --- a/drivers/base/power/wakeup_stats.c +++ b/drivers/base/power/wakeup_stats.c @@ -34,6 +34,7 @@ wakeup_attr(active_count); wakeup_attr(event_count); wakeup_attr(wakeup_count); wakeup_attr(expire_count); +wakeup_attr(relax_count); static ssize_t active_time_ms_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -119,6 +120,7 @@ static struct attribute *wakeup_source_attrs[] = { &dev_attr_event_count.attr, &dev_attr_wakeup_count.attr, &dev_attr_expire_count.attr, + &dev_attr_relax_count.attr, &dev_attr_active_time_ms.attr, &dev_attr_total_time_ms.attr, &dev_attr_max_time_ms.attr, -- 2.51.0 From f0050a3e214aa941b78ad4caf122a735a24d81a6 Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Mon, 5 May 2025 17:26:51 +0800 Subject: [PATCH 14/16] PM: wakeup: Delete space in the end of string shown by pm_show_wakelocks() pm_show_wakelocks() is called to generate a string when showing attributes /sys/power/wake_(lock|unlock), but the string ends with an unwanted space that was added back by mistake by commit c9d967b2ce40 ("PM: wakeup: simplify the output logic of pm_show_wakelocks()"). Remove the unwanted space. Fixes: c9d967b2ce40 ("PM: wakeup: simplify the output logic of pm_show_wakelocks()") Signed-off-by: Zijun Hu Link: https://patch.msgid.link/20250505-fix_power-v1-1-0f7f2c2f338c@quicinc.com [ rjw: Changelog edits ] Signed-off-by: Rafael J. Wysocki --- kernel/power/wakelock.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 52571dcad768..4e941999a53b 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -49,6 +49,9 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) len += sysfs_emit_at(buf, len, "%s ", wl->name); } + if (len > 0) + --len; + len += sysfs_emit_at(buf, len, "\n"); mutex_unlock(&wakelocks_lock); -- 2.51.0 From 50c9bb30dc1f9731995a191deafbc49be717053e Mon Sep 17 00:00:00 2001 From: Zihuan Zhang Date: Wed, 7 May 2025 14:35:20 +0800 Subject: [PATCH 15/16] PM: hibernate: add configurable delay for pm_test Turn the default 5 second test delay for hibernation into a configurable module parameter, so users can determine how long to wait in this pseudo-hibernate state before resuming the system. The configurable delay parameter has been added for suspend, so add an analogous one for hibernation. Example (wait 30 seconds); # echo 30 > /sys/module/hibernate/parameters/pm_test_delay # echo core > /sys/power/pm_test Signed-off-by: Zihuan Zhang Reviewed-by: Randy Dunlap Link: https://patch.msgid.link/20250507063520.419635-1-zhangzihuan@kylinos.cn [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- Documentation/admin-guide/kernel-parameters.txt | 7 +++++++ kernel/power/hibernate.c | 9 +++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 76e538c77e31..ccd61e22403f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1831,6 +1831,13 @@ lz4: Select LZ4 compression algorithm to compress/decompress hibernation image. + hibernate.pm_test_delay= + [HIBERNATION] + Sets the number of seconds to remain in a hibernation test + mode before resuming the system (see + /sys/power/pm_test). Only available when CONFIG_PM_DEBUG + is set. Default value is 5. + highmem=nn[KMG] [KNL,BOOT,EARLY] forces the highmem zone to have an exact size of . This works even on boxes that have no highmem otherwise. This also works to reduce highmem diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f0db9d1896e8..f4db2e82fd87 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -133,10 +133,15 @@ bool system_entering_hibernation(void) EXPORT_SYMBOL(system_entering_hibernation); #ifdef CONFIG_PM_DEBUG +static unsigned int pm_test_delay = 5; +module_param(pm_test_delay, uint, 0644); +MODULE_PARM_DESC(pm_test_delay, + "Number of seconds to wait before resuming from hibernation test"); static void hibernation_debug_sleep(void) { - pr_info("debug: Waiting for 5 seconds.\n"); - mdelay(5000); + pr_info("hibernation debug: Waiting for %d second(s).\n", + pm_test_delay); + mdelay(pm_test_delay * 1000); } static int hibernation_test(int level) -- 2.51.0 From d559335f3f0cae88d9af49d6572297a30286286f Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Mon, 12 May 2025 15:26:53 +0200 Subject: [PATCH 16/16] ucsi_ccg: Disable async suspend in ucsi_ccg_probe() Commit aa7a9275ab81 ("PM: sleep: Suspend async parents after suspending children") had triggered a suspend issue on Tegra boards because it had reordered the syspend of devices with async suspend enabled with respect to some other devices. Specifically, the devices with async suspend enabled that have no children are now suspended before any other devices unless there are device links pointing to them as suppliers. The investigation that followed the failure report uncovered that async suspend was enabled for the cypd4226 device that was a Type-C controller with a dependency on USB PHY and it turned out that disabling async suspend for that device made the issue go away. Since async suspend takes dependencies between parents and children into account as well as other dependencies between devices represented by device links, this means that the cypd4226 has a dependency on another device that is not represented in any form in the kernel (a "hidden" dependency), in which case async suspend should not be enabled for it. Accordingly, make ucsi_ccg_probe() disable async suspend for the devices handled by, which covers the cypd4226 device on the Tegra boards as well as other devices likely to have similar "hidden" dependencies. Fixes: aa7a9275ab81 ("PM: sleep: Suspend async parents after suspending children") Closes: https://lore.kernel.org/linux-pm/c6cd714b-b0eb-42fc-b9b5-4f5f396fb4ec@nvidia.com/ Reported-by: Jon Hunter Tested-by: Jon Hunter Signed-off-by: Jon Hunter Signed-off-by: Rafael J. Wysocki Reviewed-by: Heikki Krogerus Link: https://patch.msgid.link/6180608.lOV4Wx5bFT@rjwysocki.net --- drivers/usb/typec/ucsi/ucsi_ccg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index f01e4ef6619d..e9a9df1431af 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -1483,6 +1483,8 @@ static int ucsi_ccg_probe(struct i2c_client *client) i2c_set_clientdata(client, uc); + device_disable_async_suspend(uc->dev); + pm_runtime_set_active(uc->dev); pm_runtime_enable(uc->dev); pm_runtime_use_autosuspend(uc->dev); -- 2.51.0