From 994633894f208a0151baaee1688ab3c431912553 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 12:53:18 -0400 Subject: [PATCH 01/16] tools/power turbostat: re-factor sysfs code Probe cpuidle "sysfs" residency and counts separately, since soon we will make one disabled on, and the other disabled off. Clarify that some BIC (build-in-counters) are actually "groups". since we're about to re-name some of those groups. no functional change. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 31 ++++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c9a34c16c7a8..df0391bedcde 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -273,10 +273,10 @@ struct msr_counter bic[] = { #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) -#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) -#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) -#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) +#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) +#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) +#define BIC_GROUP_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -2354,16 +2354,16 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) retval |= ~0; break; } else if (!strcmp(name_list, "topology")) { - retval |= BIC_TOPOLOGY; + retval |= BIC_GROUP_TOPOLOGY; break; } else if (!strcmp(name_list, "power")) { - retval |= BIC_THERMAL_PWR; + retval |= BIC_GROUP_THERMAL_PWR; break; } else if (!strcmp(name_list, "idle")) { - retval |= BIC_IDLE; + retval |= BIC_GROUP_IDLE; break; } else if (!strcmp(name_list, "frequency")) { - retval |= BIC_FREQUENCY; + retval |= BIC_GROUP_FREQUENCY; break; } else if (!strcmp(name_list, "other")) { retval |= BIC_OTHER; @@ -10260,7 +10260,7 @@ int is_deferred_skip(char *name) return 0; } -void probe_sysfs(void) +void probe_cpuidle_residency(void) { char path[64]; char name_buf[16]; @@ -10304,6 +10304,16 @@ void probe_sysfs(void) if (state < min_state) min_state = state; } +} + +void probe_cpuidle_counts(void) +{ + char path[64]; + char name_buf[16]; + FILE *input; + int state; + int min_state = 1024, max_state = 0; + char *sp; for (state = 10; state >= 0; --state) { @@ -10602,7 +10612,8 @@ skip_cgroup_setting: print_bootcmd(); } - probe_sysfs(); + probe_cpuidle_residency(); + probe_cpuidle_counts(); if (!getuid()) set_rlimit(); -- 2.51.0 From 6f110a5e4f9977c31ce76fefbfef6fd4eab6bfb7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Apr 2025 10:00:04 -0700 Subject: [PATCH 02/16] Disable SLUB_TINY for build testing ... and don't error out so hard on missing module descriptions. Before commit 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") we used to warn about missing module descriptions, but only when building with extra warnigns (ie 'W=1'). After that commit the warning became an unconditional hard error. And it turns out not all modules have been converted despite the claims to the contrary. As reported by Damian Tometzki, the slub KUnit test didn't have a module description, and apparently nobody ever really noticed. The reason nobody noticed seems to be that the slub KUnit tests get disabled by SLUB_TINY, which also ends up disabling a lot of other code, both in tests and in slub itself. And so anybody doing full build tests didn't actually see this failre. So let's disable SLUB_TINY for build-only tests, since it clearly ends up limiting build coverage. Also turn the missing module descriptions error back into a warning, but let's keep it around for non-'W=1' builds. Reported-by: Damian Tometzki Link: https://lore.kernel.org/all/01070196099fd059-e8463438-7b1b-4ec8-816d-173874be9966-000000@eu-central-1.amazonses.com/ Cc: Masahiro Yamada Cc: Jeff Johnson Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Linus Torvalds --- mm/Kconfig | 2 +- scripts/mod/modpost.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index d3fb3762887b..e113f713b493 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -201,7 +201,7 @@ config KVFREE_RCU_BATCHED config SLUB_TINY bool "Configure for minimal memory footprint" - depends on EXPERT + depends on EXPERT && !COMPILE_TEST select SLAB_MERGE_DEFAULT help Configures the slab allocator in a way to achieve minimal memory diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 92627e8d0e16..be89921d60b6 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1603,7 +1603,7 @@ static void read_symbols(const char *modname) } if (!get_modinfo(&info, "description")) - error("missing MODULE_DESCRIPTION() in %s\n", modname); + warn("missing MODULE_DESCRIPTION() in %s\n", modname); } for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { -- 2.51.0 From ec4acd3166d8a7a03b059d01b9c6f11a658e833f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 14:29:57 -0400 Subject: [PATCH 03/16] tools/power turbostat: disable "cpuidle" invocation counters, by default Create "pct_idle" counter group, the sofware notion of residency so it can now be singled out, independent of other counter groups. Create "cpuidle" group, the cpuidle invocation counts. Disable "cpuidle", by default. Create "swidle" = "cpuidle" + "pct_idle". Undocument "sysfs", the old name for "swidle", but keep it working for backwards compatibilty. Create "hwidle", all the HW idle counters Modify "idle", enabled by default "idle" = "hwidle" + "pct_idle" (and now excludes "cpuidle") Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 12 +++++----- tools/power/x86/turbostat/turbostat.c | 34 +++++++++++++++++++++------ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e86493880c16..b74ed916057e 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. .PP -\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other". +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "idle_pct". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "idle_pct". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". .PP \fB--Dump\fP displays the raw counter values. .PP @@ -158,15 +158,15 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. .PP -\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. +\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. +\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. +\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. +\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default. .PP -\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. +\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default. .PP \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. .PP diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index df0391bedcde..ab184f95cdaf 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -153,7 +153,7 @@ struct msr_counter bic[] = { { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, - { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, @@ -206,6 +206,7 @@ struct msr_counter bic[] = { { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -219,7 +220,7 @@ struct msr_counter bic[] = { #define BIC_TSC_MHz (1ULL << 7) #define BIC_IRQ (1ULL << 8) #define BIC_SMI (1ULL << 9) -#define BIC_sysfs (1ULL << 10) +#define BIC_cpuidle (1ULL << 10) #define BIC_CPU_c1 (1ULL << 11) #define BIC_CPU_c3 (1ULL << 12) #define BIC_CPU_c6 (1ULL << 13) @@ -272,17 +273,20 @@ struct msr_counter bic[] = { #define BIC_Sys_J (1ULL << 60) #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) +#define BIC_pct_idle (1ULL << 63) #define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) #define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) #define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_GROUP_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) +#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) @@ -2362,6 +2366,15 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } else if (!strcmp(name_list, "idle")) { retval |= BIC_GROUP_IDLE; break; + } else if (!strcmp(name_list, "swidle")) { + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "hwidle")) { + retval |= BIC_GROUP_HW_IDLE; + break; } else if (!strcmp(name_list, "frequency")) { retval |= BIC_GROUP_FREQUENCY; break; @@ -2372,6 +2385,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } if (i == MAX_BIC) { + fprintf(stderr, "deferred %s\n", name_list); if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { @@ -10269,6 +10283,9 @@ void probe_cpuidle_residency(void) int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_pct_idle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10291,7 +10308,7 @@ void probe_cpuidle_residency(void) sprintf(path, "cpuidle/state%d/time", state); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) @@ -10315,6 +10332,9 @@ void probe_cpuidle_counts(void) int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_cpuidle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10327,7 +10347,7 @@ void probe_cpuidle_counts(void) remove_underbar(name_buf); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) -- 2.51.0 From 03e00e373cab981ad808271b2650700cfa0fbda6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 14:49:20 -0400 Subject: [PATCH 04/16] tools/power turbostat: v2025.05.06 Support up to 8192 processors Add cpuidle governor debug telemetry, disabled by default Update default output to exclude cpuidle invocation counts Bug fixes Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ab184f95cdaf..1b9fdc1a7ee8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9594,7 +9594,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.02.02 - Len Brown \n"); + fprintf(outf, "turbostat version 2025.04.06 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 -- 2.51.0 From 0efdedb3358aa78102967f242379686f94315830 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 2 Apr 2025 21:21:57 +0100 Subject: [PATCH 05/16] tools/include: make uapi/linux/types.h usable from assembly MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The "real" linux/types.h UAPI header gracefully degrades to a NOOP when included from assembly code. Mirror this behaviour in the tools/ variant. Test for __ASSEMBLER__ over __ASSEMBLY__ as the former is provided by the toolchain automatically. Reported-by: Mark Brown Closes: https://lore.kernel.org/lkml/af553c62-ca2f-4956-932c-dd6e3a126f58@sirena.org.uk/ Fixes: c9fbaa879508 ("selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers") Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20250321-uapi-consistency-v1-1-439070118dc0@linutronix.de Signed-off-by: Mark Brown Reviewed-by: Mark Brown Signed-off-by: Linus Torvalds --- tools/include/uapi/linux/types.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h index 91fa51a9c31d..85aa327245c6 100644 --- a/tools/include/uapi/linux/types.h +++ b/tools/include/uapi/linux/types.h @@ -4,6 +4,8 @@ #include +#ifndef __ASSEMBLER__ + /* copied from linux:include/uapi/linux/types.h */ #define __bitwise typedef __u16 __bitwise __le16; @@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum; #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_LINUX_TYPES_H */ -- 2.51.0 From 0af2f6be1b4281385b618cb86ad946eded089ac8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Apr 2025 13:11:33 -0700 Subject: [PATCH 06/16] Linux 6.15-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e55726a71d95..38689a0c3605 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 14 +PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 0a02e1f4a54ace747304687ced3b76d159e58914 Mon Sep 17 00:00:00 2001 From: Yixun Lan Date: Wed, 26 Mar 2025 06:06:19 +0800 Subject: [PATCH 07/16] irqdomain: Support three-cell scheme interrupts Add new function *_twothreecell() to extend support to parse three-cell interrupts which encoded as , the translate function will retrieve irq number and flag from last two cells. This API will be used in gpio irq driver which need to work with two or three cells cases. Signed-off-by: Yixun Lan Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250326-04-gpio-irq-threecell-v3-1-aab006ab0e00@gentoo.org --- include/linux/irqdomain.h | 20 +++++++------- kernel/irq/irqdomain.c | 56 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index bb7111105296..df7e9278c8ac 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -571,16 +571,16 @@ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, unsigned int *out_type); - -int irq_domain_translate_twocell(struct irq_domain *d, - struct irq_fwspec *fwspec, - unsigned long *out_hwirq, - unsigned int *out_type); - -int irq_domain_translate_onecell(struct irq_domain *d, - struct irq_fwspec *fwspec, - unsigned long *out_hwirq, - unsigned int *out_type); +int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); + +int irq_domain_translate_onecell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); +int irq_domain_translate_twocell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); +int irq_domain_translate_twothreecell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); /* IPI functions */ int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 9d5c8651492d..b294c3ff73b6 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1132,6 +1132,31 @@ int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, } EXPORT_SYMBOL_GPL(irq_domain_xlate_twocell); +/** + * irq_domain_xlate_twothreecell() - Generic xlate for direct two or three cell bindings + * @d: Interrupt domain involved in the translation + * @ctrlr: The device tree node for the device whose interrupt is translated + * @intspec: The interrupt specifier data from the device tree + * @intsize: The number of entries in @intspec + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type + * + * Device Tree interrupt specifier translation function for two or three + * cell bindings, where the cell values map directly to the hardware + * interrupt number and the type specifier. + */ +int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) +{ + struct irq_fwspec fwspec; + + of_phandle_args_to_fwspec(ctrlr, intspec, intsize, &fwspec); + + return irq_domain_translate_twothreecell(d, &fwspec, out_hwirq, out_type); +} +EXPORT_SYMBOL_GPL(irq_domain_xlate_twothreecell); + /** * irq_domain_xlate_onetwocell() - Generic xlate for one or two cell bindings * @d: Interrupt domain involved in the translation @@ -1216,6 +1241,37 @@ int irq_domain_translate_twocell(struct irq_domain *d, } EXPORT_SYMBOL_GPL(irq_domain_translate_twocell); +/** + * irq_domain_translate_twothreecell() - Generic translate for direct two or three cell + * bindings + * @d: Interrupt domain involved in the translation + * @fwspec: The firmware interrupt specifier to translate + * @out_hwirq: Pointer to storage for the hardware interrupt number + * @out_type: Pointer to storage for the interrupt type + * + * Firmware interrupt specifier translation function for two or three cell + * specifications, where the parameter values map directly to the hardware + * interrupt number and the type specifier. + */ +int irq_domain_translate_twothreecell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type) +{ + if (fwspec->param_count == 2) { + *out_hwirq = fwspec->param[0]; + *out_type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; + return 0; + } + + if (fwspec->param_count == 3) { + *out_hwirq = fwspec->param[1]; + *out_type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(irq_domain_translate_twothreecell); + int irq_domain_alloc_descs(int virq, unsigned int cnt, irq_hw_number_t hwirq, int node, const struct irq_affinity_desc *affinity) { -- 2.51.0 From 41c95ac4839401cb15e6c9a7756226f6af52ea49 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 16 Apr 2025 13:16:51 +0300 Subject: [PATCH 08/16] genirq/irqdesc: Use sysfs_emit() to instead of s*printf() Follow the advice of the Documentation/filesystems/sysfs.rst that show() should only use sysfs_emit() or sysfs_emit_at() when formatting the value to be returned to user space. Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250416101651.2128688-1-andriy.shevchenko@linux.intel.com --- kernel/irq/irqdesc.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 4258cd6bd3b4..4bcc6ff81e39 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -257,11 +257,11 @@ static ssize_t per_cpu_count_show(struct kobject *kobj, for_each_possible_cpu(cpu) { unsigned int c = irq_desc_kstat_cpu(desc, cpu); - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%u", p, c); + ret += sysfs_emit_at(buf, ret, "%s%u", p, c); p = ","; } - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += sysfs_emit_at(buf, ret, "\n"); return ret; } IRQ_ATTR_RO(per_cpu_count); @@ -273,10 +273,8 @@ static ssize_t chip_name_show(struct kobject *kobj, ssize_t ret = 0; raw_spin_lock_irq(&desc->lock); - if (desc->irq_data.chip && desc->irq_data.chip->name) { - ret = scnprintf(buf, PAGE_SIZE, "%s\n", - desc->irq_data.chip->name); - } + if (desc->irq_data.chip && desc->irq_data.chip->name) + ret = sysfs_emit(buf, "%s\n", desc->irq_data.chip->name); raw_spin_unlock_irq(&desc->lock); return ret; @@ -291,7 +289,7 @@ static ssize_t hwirq_show(struct kobject *kobj, raw_spin_lock_irq(&desc->lock); if (desc->irq_data.domain) - ret = sprintf(buf, "%lu\n", desc->irq_data.hwirq); + ret = sysfs_emit(buf, "%lu\n", desc->irq_data.hwirq); raw_spin_unlock_irq(&desc->lock); return ret; @@ -305,8 +303,7 @@ static ssize_t type_show(struct kobject *kobj, ssize_t ret = 0; raw_spin_lock_irq(&desc->lock); - ret = sprintf(buf, "%s\n", - irqd_is_level_type(&desc->irq_data) ? "level" : "edge"); + ret = sysfs_emit(buf, "%s\n", irqd_is_level_type(&desc->irq_data) ? "level" : "edge"); raw_spin_unlock_irq(&desc->lock); return ret; @@ -321,7 +318,7 @@ static ssize_t wakeup_show(struct kobject *kobj, ssize_t ret = 0; raw_spin_lock_irq(&desc->lock); - ret = sprintf(buf, "%s\n", str_enabled_disabled(irqd_is_wakeup_set(&desc->irq_data))); + ret = sysfs_emit(buf, "%s\n", str_enabled_disabled(irqd_is_wakeup_set(&desc->irq_data))); raw_spin_unlock_irq(&desc->lock); return ret; @@ -337,7 +334,7 @@ static ssize_t name_show(struct kobject *kobj, raw_spin_lock_irq(&desc->lock); if (desc->name) - ret = scnprintf(buf, PAGE_SIZE, "%s\n", desc->name); + ret = sysfs_emit(buf, "%s\n", desc->name); raw_spin_unlock_irq(&desc->lock); return ret; @@ -354,14 +351,13 @@ static ssize_t actions_show(struct kobject *kobj, raw_spin_lock_irq(&desc->lock); for_each_action_of_desc(desc, action) { - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s", - p, action->name); + ret += sysfs_emit_at(buf, ret, "%s%s", p, action->name); p = ","; } raw_spin_unlock_irq(&desc->lock); if (ret) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n"); + ret += sysfs_emit_at(buf, ret, "\n"); return ret; } -- 2.51.0 From 0128816c42b52c6ee339718621aeda85855cd3be Mon Sep 17 00:00:00 2001 From: Cheng-Yang Chou Date: Thu, 10 Apr 2025 18:51:43 +0800 Subject: [PATCH 09/16] genirq: Fix typo in IRQ_NOTCONNECTED comment Fix a minor typo in the comment for IRQ_NOTCONNECTED: "distingiush" is corrected to "distinguish". Signed-off-by: Cheng-Yang Chou Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250410105144.214849-1-yphbchou0911@gmail.com --- include/linux/interrupt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c782a74d2a30..51b6484c0493 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -140,7 +140,7 @@ extern irqreturn_t no_action(int cpl, void *dev_id); /* * If a (PCI) device interrupt is not connected we set dev->irq to * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we - * can distingiush that case from other error returns. + * can distinguish that case from other error returns. * * 0x80000000 is guaranteed to be outside the available range of interrupts * and easy to distinguish from other possible incorrect values. -- 2.51.0 From e5032ead8599affac5d8b816ea3c9d63ebeec6b4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 16 Apr 2025 14:40:33 +0300 Subject: [PATCH 10/16] genirq/irqdesc: Decrease indentation level in __irq_get_desc_lock() There is a conditional that covers all the code for the entire function. Invert it and decrease indentation level. This also helps for further changes to be clearer and tidier. [ tglx: Removed line breaks ] Signed-off-by: Andy Shevchenko Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250416114122.2191820-2-andriy.shevchenko@linux.intel.com --- kernel/irq/irqdesc.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 4bcc6ff81e39..5b3aee255ec5 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -896,27 +896,27 @@ unsigned int irq_get_next_irq(unsigned int offset) return irq_find_at_or_after(offset); } -struct irq_desc * -__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, - unsigned int check) +struct irq_desc *__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, + unsigned int check) { - struct irq_desc *desc = irq_to_desc(irq); + struct irq_desc *desc; - if (desc) { - if (check & _IRQ_DESC_CHECK) { - if ((check & _IRQ_DESC_PERCPU) && - !irq_settings_is_per_cpu_devid(desc)) - return NULL; - - if (!(check & _IRQ_DESC_PERCPU) && - irq_settings_is_per_cpu_devid(desc)) - return NULL; - } + desc = irq_to_desc(irq); + if (!desc) + return NULL; + + if (check & _IRQ_DESC_CHECK) { + if ((check & _IRQ_DESC_PERCPU) && !irq_settings_is_per_cpu_devid(desc)) + return NULL; - if (bus) - chip_bus_lock(desc); - raw_spin_lock_irqsave(&desc->lock, *flags); + if (!(check & _IRQ_DESC_PERCPU) && irq_settings_is_per_cpu_devid(desc)) + return NULL; } + + if (bus) + chip_bus_lock(desc); + raw_spin_lock_irqsave(&desc->lock, *flags); + return desc; } -- 2.51.0 From 0f70a49f3fa386d34203efd426a2937592cd26c6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 Apr 2025 08:54:49 +0200 Subject: [PATCH 11/16] genirq: Provide conditional lock guards The interrupt core code has an ever repeating pattern: unsigned long flags; struct irq_desc *desc = irq_get_desc_[bus]lock(irq, &flags, mode); if (!desc) return -EINVAL; .... irq_put_desc_[bus]unlock(desc, flags); That requires gotos in failure paths and just creates visual clutter. Provide lock guards, which allow to simplify the code. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250429065420.061659985@linutronix.de --- kernel/irq/internals.h | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index b0290849c395..44d3a67ec191 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -141,6 +141,10 @@ extern int irq_setup_affinity(struct irq_desc *desc); static inline int irq_setup_affinity(struct irq_desc *desc) { return 0; } #endif + +#define for_each_action_of_desc(desc, act) \ + for (act = desc->action; act; act = act->next) + /* Inline functions for support of irq chips on slow busses */ static inline void chip_bus_lock(struct irq_desc *desc) { @@ -160,14 +164,33 @@ static inline void chip_bus_sync_unlock(struct irq_desc *desc) #define IRQ_GET_DESC_CHECK_GLOBAL (_IRQ_DESC_CHECK) #define IRQ_GET_DESC_CHECK_PERCPU (_IRQ_DESC_CHECK | _IRQ_DESC_PERCPU) -#define for_each_action_of_desc(desc, act) \ - for (act = desc->action; act; act = act->next) - -struct irq_desc * -__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, - unsigned int check); +struct irq_desc *__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus, + unsigned int check); void __irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags, bool bus); +__DEFINE_CLASS_IS_CONDITIONAL(irqdesc_lock, true); +__DEFINE_UNLOCK_GUARD(irqdesc_lock, struct irq_desc, + __irq_put_desc_unlock(_T->lock, _T->flags, _T->bus), + unsigned long flags; bool bus); + +static inline class_irqdesc_lock_t class_irqdesc_lock_constructor(unsigned int irq, bool bus, + unsigned int check) +{ + class_irqdesc_lock_t _t = { + .bus = bus, + .lock = __irq_get_desc_lock(irq, &_t.flags, bus, check), + }; + return _t; +} + +#define scoped_irqdesc_get_and_lock(_irq, _check) \ + scoped_guard(irqdesc_lock, _irq, false, _check) + +#define scoped_irqdesc_get_and_buslock(_irq, _check) \ + scoped_guard(irqdesc_lock, _irq, true, _check) + +#define scoped_irqdesc ((struct irq_desc *)(__guard_ptr(irqdesc_lock)(&scope))) + static inline struct irq_desc * irq_get_desc_buslock(unsigned int irq, unsigned long *flags, unsigned int check) { -- 2.51.0 From 5d964a9f7cd8f669db588d9d0db61b4f81af4978 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 30 Apr 2025 08:36:39 +0200 Subject: [PATCH 12/16] genirq/irqdesc: Switch to lock guards Replace all lock/unlock pairs with lock guards and simplify the code flow. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Jiri Slaby Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/871ptaqhoo.ffs@tglx --- kernel/irq/irqdesc.c | 129 +++++++++++++++---------------------------- 1 file changed, 44 insertions(+), 85 deletions(-) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 5b3aee255ec5..6d006a68ff85 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -246,8 +246,7 @@ static struct kobject *irq_kobj_base; #define IRQ_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) -static ssize_t per_cpu_count_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t per_cpu_count_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); ssize_t ret = 0; @@ -266,99 +265,75 @@ static ssize_t per_cpu_count_show(struct kobject *kobj, } IRQ_ATTR_RO(per_cpu_count); -static ssize_t chip_name_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t chip_name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - ssize_t ret = 0; - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); if (desc->irq_data.chip && desc->irq_data.chip->name) - ret = sysfs_emit(buf, "%s\n", desc->irq_data.chip->name); - raw_spin_unlock_irq(&desc->lock); - - return ret; + return sysfs_emit(buf, "%s\n", desc->irq_data.chip->name); + return 0; } IRQ_ATTR_RO(chip_name); -static ssize_t hwirq_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t hwirq_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - ssize_t ret = 0; + guard(raw_spinlock_irq)(&desc->lock); raw_spin_lock_irq(&desc->lock); if (desc->irq_data.domain) - ret = sysfs_emit(buf, "%lu\n", desc->irq_data.hwirq); - raw_spin_unlock_irq(&desc->lock); - - return ret; + return sysfs_emit(buf, "%lu\n", desc->irq_data.hwirq); + return 0; } IRQ_ATTR_RO(hwirq); -static ssize_t type_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - ssize_t ret = 0; - - raw_spin_lock_irq(&desc->lock); - ret = sysfs_emit(buf, "%s\n", irqd_is_level_type(&desc->irq_data) ? "level" : "edge"); - raw_spin_unlock_irq(&desc->lock); - return ret; + guard(raw_spinlock_irq)(&desc->lock); + return sysfs_emit(buf, "%s\n", irqd_is_level_type(&desc->irq_data) ? "level" : "edge"); } IRQ_ATTR_RO(type); -static ssize_t wakeup_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t wakeup_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - ssize_t ret = 0; - - raw_spin_lock_irq(&desc->lock); - ret = sysfs_emit(buf, "%s\n", str_enabled_disabled(irqd_is_wakeup_set(&desc->irq_data))); - raw_spin_unlock_irq(&desc->lock); - - return ret; + guard(raw_spinlock_irq)(&desc->lock); + return sysfs_emit(buf, "%s\n", str_enabled_disabled(irqd_is_wakeup_set(&desc->irq_data))); } IRQ_ATTR_RO(wakeup); -static ssize_t name_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); - ssize_t ret = 0; - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); if (desc->name) - ret = sysfs_emit(buf, "%s\n", desc->name); - raw_spin_unlock_irq(&desc->lock); - - return ret; + return sysfs_emit(buf, "%s\n", desc->name); + return 0; } IRQ_ATTR_RO(name); -static ssize_t actions_show(struct kobject *kobj, - struct kobj_attribute *attr, char *buf) +static ssize_t actions_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct irq_desc *desc = container_of(kobj, struct irq_desc, kobj); struct irqaction *action; ssize_t ret = 0; char *p = ""; - raw_spin_lock_irq(&desc->lock); - for_each_action_of_desc(desc, action) { - ret += sysfs_emit_at(buf, ret, "%s%s", p, action->name); - p = ","; + scoped_guard(raw_spinlock_irq, &desc->lock) { + for_each_action_of_desc(desc, action) { + ret += sysfs_emit_at(buf, ret, "%s%s", p, action->name); + p = ","; + } } - raw_spin_unlock_irq(&desc->lock); if (ret) ret += sysfs_emit_at(buf, ret, "\n"); - return ret; } IRQ_ATTR_RO(actions); @@ -414,19 +389,14 @@ static int __init irq_sysfs_init(void) int irq; /* Prevent concurrent irq alloc/free */ - irq_lock_sparse(); - + guard(mutex)(&sparse_irq_lock); irq_kobj_base = kobject_create_and_add("irq", kernel_kobj); - if (!irq_kobj_base) { - irq_unlock_sparse(); + if (!irq_kobj_base) return -ENOMEM; - } /* Add the already allocated interrupts */ for_each_irq_desc(irq, desc) irq_sysfs_add(irq, desc); - irq_unlock_sparse(); - return 0; } postcore_initcall(irq_sysfs_init); @@ -569,12 +539,12 @@ err: return -ENOMEM; } -static int irq_expand_nr_irqs(unsigned int nr) +static bool irq_expand_nr_irqs(unsigned int nr) { if (nr > MAX_SPARSE_IRQS) - return -ENOMEM; + return false; nr_irqs = nr; - return 0; + return true; } int __init early_irq_init(void) @@ -652,11 +622,9 @@ EXPORT_SYMBOL(irq_to_desc); static void free_desc(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - raw_spin_lock_irqsave(&desc->lock, flags); - desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL); - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) + desc_set_defaults(irq, desc, irq_desc_get_node(desc), NULL, NULL); delete_irq_desc(irq); } @@ -675,16 +643,15 @@ static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, return start; } -static int irq_expand_nr_irqs(unsigned int nr) +static inline bool irq_expand_nr_irqs(unsigned int nr) { - return -ENOMEM; + return false; } void irq_mark_irq(unsigned int irq) { - mutex_lock(&sparse_irq_lock); + guard(mutex)(&sparse_irq_lock); irq_insert_desc(irq, irq_desc + irq); - mutex_unlock(&sparse_irq_lock); } #ifdef CONFIG_GENERIC_IRQ_LEGACY @@ -823,11 +790,9 @@ void irq_free_descs(unsigned int from, unsigned int cnt) if (from >= nr_irqs || (from + cnt) > nr_irqs) return; - mutex_lock(&sparse_irq_lock); + guard(mutex)(&sparse_irq_lock); for (i = 0; i < cnt; i++) free_desc(from + i); - - mutex_unlock(&sparse_irq_lock); } EXPORT_SYMBOL_GPL(irq_free_descs); @@ -844,11 +809,10 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * * Returns the first irq number or error code */ -int __ref -__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, - struct module *owner, const struct irq_affinity_desc *affinity) +int __ref __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, + struct module *owner, const struct irq_affinity_desc *affinity) { - int start, ret; + int start; if (!cnt) return -EINVAL; @@ -866,22 +830,17 @@ __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, from = arch_dynirq_lower_bound(from); } - mutex_lock(&sparse_irq_lock); + guard(mutex)(&sparse_irq_lock); start = irq_find_free_area(from, cnt); - ret = -EEXIST; if (irq >=0 && start != irq) - goto unlock; + return -EEXIST; if (start + cnt > nr_irqs) { - ret = irq_expand_nr_irqs(start + cnt); - if (ret) - goto unlock; + if (!irq_expand_nr_irqs(start + cnt)) + return -ENOMEM; } - ret = alloc_descs(start, cnt, node, affinity, owner); -unlock: - mutex_unlock(&sparse_irq_lock); - return ret; + return alloc_descs(start, cnt, node, affinity, owner); } EXPORT_SYMBOL_GPL(__irq_alloc_descs); -- 2.51.0 From e80618b27a008839e3b61c1efa0b915b155f2a8d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 Apr 2025 08:54:52 +0200 Subject: [PATCH 13/16] genirq/autoprobe: Switch to lock guards Convert all lock/unlock pairs to guards. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250429065420.188866381@linutronix.de --- kernel/irq/autoprobe.c | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index ae60cae24e9a..d0af8a8b3ae6 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -43,18 +43,16 @@ unsigned long probe_irq_on(void) * flush such a longstanding irq before considering it as spurious. */ for_each_irq_desc_reverse(i, desc) { - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); if (!desc->action && irq_settings_can_probe(desc)) { /* * Some chips need to know about probing in * progress: */ if (desc->irq_data.chip->irq_set_type) - desc->irq_data.chip->irq_set_type(&desc->irq_data, - IRQ_TYPE_PROBE); + desc->irq_data.chip->irq_set_type(&desc->irq_data, IRQ_TYPE_PROBE); irq_activate_and_startup(desc, IRQ_NORESEND); } - raw_spin_unlock_irq(&desc->lock); } /* Wait for longstanding interrupts to trigger. */ @@ -66,13 +64,12 @@ unsigned long probe_irq_on(void) * happened in the previous stage, it may have masked itself) */ for_each_irq_desc_reverse(i, desc) { - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); if (!desc->action && irq_settings_can_probe(desc)) { desc->istate |= IRQS_AUTODETECT | IRQS_WAITING; if (irq_activate_and_startup(desc, IRQ_NORESEND)) desc->istate |= IRQS_PENDING; } - raw_spin_unlock_irq(&desc->lock); } /* @@ -84,18 +81,16 @@ unsigned long probe_irq_on(void) * Now filter out any obviously spurious interrupts */ for_each_irq_desc(i, desc) { - raw_spin_lock_irq(&desc->lock); - + guard(raw_spinlock_irq)(&desc->lock); if (desc->istate & IRQS_AUTODETECT) { /* It triggered already - consider it spurious. */ if (!(desc->istate & IRQS_WAITING)) { desc->istate &= ~IRQS_AUTODETECT; irq_shutdown_and_deactivate(desc); - } else - if (i < 32) - mask |= 1 << i; + } else if (i < 32) { + mask |= 1 << i; + } } - raw_spin_unlock_irq(&desc->lock); } return mask; @@ -121,7 +116,7 @@ unsigned int probe_irq_mask(unsigned long val) int i; for_each_irq_desc(i, desc) { - raw_spin_lock_irq(&desc->lock); + guard(raw_spinlock_irq)(&desc->lock); if (desc->istate & IRQS_AUTODETECT) { if (i < 16 && !(desc->istate & IRQS_WAITING)) mask |= 1 << i; @@ -129,7 +124,6 @@ unsigned int probe_irq_mask(unsigned long val) desc->istate &= ~IRQS_AUTODETECT; irq_shutdown_and_deactivate(desc); } - raw_spin_unlock_irq(&desc->lock); } mutex_unlock(&probing_active); @@ -160,8 +154,7 @@ int probe_irq_off(unsigned long val) struct irq_desc *desc; for_each_irq_desc(i, desc) { - raw_spin_lock_irq(&desc->lock); - + guard(raw_spinlock_irq)(&desc->lock); if (desc->istate & IRQS_AUTODETECT) { if (!(desc->istate & IRQS_WAITING)) { if (!nr_of_irqs) @@ -171,7 +164,6 @@ int probe_irq_off(unsigned long val) desc->istate &= ~IRQS_AUTODETECT; irq_shutdown_and_deactivate(desc); } - raw_spin_unlock_irq(&desc->lock); } mutex_unlock(&probing_active); -- 2.51.0 From 19b4b14428338775d8c0d0e725ecfb14e10121c3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 Apr 2025 08:54:53 +0200 Subject: [PATCH 14/16] genirq/pm: Switch to lock guards Convert all lock/unlock pairs to guards and tidy up the code. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250429065420.251299112@linutronix.de --- kernel/irq/pm.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c index c556bc49d213..445912d51033 100644 --- a/kernel/irq/pm.c +++ b/kernel/irq/pm.c @@ -46,8 +46,7 @@ void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) desc->cond_suspend_depth++; WARN_ON_ONCE(desc->no_suspend_depth && - (desc->no_suspend_depth + - desc->cond_suspend_depth) != desc->nr_actions); + (desc->no_suspend_depth + desc->cond_suspend_depth) != desc->nr_actions); } /* @@ -134,14 +133,12 @@ void suspend_device_irqs(void) int irq; for_each_irq_desc(irq, desc) { - unsigned long flags; bool sync; if (irq_settings_is_nested_thread(desc)) continue; - raw_spin_lock_irqsave(&desc->lock, flags); - sync = suspend_device_irq(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irqsave, &desc->lock) + sync = suspend_device_irq(desc); if (sync) synchronize_irq(irq); @@ -186,18 +183,15 @@ static void resume_irqs(bool want_early) int irq; for_each_irq_desc(irq, desc) { - unsigned long flags; - bool is_early = desc->action && - desc->action->flags & IRQF_EARLY_RESUME; + bool is_early = desc->action && desc->action->flags & IRQF_EARLY_RESUME; if (!is_early && want_early) continue; if (irq_settings_is_nested_thread(desc)) continue; - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irqsave)(&desc->lock); resume_irq(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); } } @@ -207,22 +201,16 @@ static void resume_irqs(bool want_early) */ void rearm_wake_irq(unsigned int irq) { - unsigned long flags; - struct irq_desc *desc = irq_get_desc_buslock(irq, &flags, IRQ_GET_DESC_CHECK_GLOBAL); + scoped_irqdesc_get_and_buslock(irq, IRQ_GET_DESC_CHECK_GLOBAL) { + struct irq_desc *desc = scoped_irqdesc; - if (!desc) - return; - - if (!(desc->istate & IRQS_SUSPENDED) || - !irqd_is_wakeup_set(&desc->irq_data)) - goto unlock; - - desc->istate &= ~IRQS_SUSPENDED; - irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); - __enable_irq(desc); + if (!(desc->istate & IRQS_SUSPENDED) || !irqd_is_wakeup_set(&desc->irq_data)) + return; -unlock: - irq_put_desc_busunlock(desc, flags); + desc->istate &= ~IRQS_SUSPENDED; + irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED); + __enable_irq(desc); + } } /** -- 2.51.0 From 4bcdf07467fab54a5dfbb0fb8546b5e59c87c497 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 Apr 2025 08:54:55 +0200 Subject: [PATCH 15/16] genirq/resend: Switch to lock guards Convert all lock/unlock pairs to guards and tidy up the code. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250429065420.312487167@linutronix.de --- kernel/irq/resend.c | 50 +++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 29 deletions(-) diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index 1b7fa72968bd..ca9cc1b806a9 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c @@ -30,18 +30,17 @@ static DEFINE_RAW_SPINLOCK(irq_resend_lock); */ static void resend_irqs(struct tasklet_struct *unused) { - struct irq_desc *desc; - - raw_spin_lock_irq(&irq_resend_lock); + guard(raw_spinlock_irq)(&irq_resend_lock); while (!hlist_empty(&irq_resend_list)) { - desc = hlist_entry(irq_resend_list.first, struct irq_desc, - resend_node); + struct irq_desc *desc; + + desc = hlist_entry(irq_resend_list.first, struct irq_desc, resend_node); hlist_del_init(&desc->resend_node); + raw_spin_unlock(&irq_resend_lock); desc->handle_irq(desc); raw_spin_lock(&irq_resend_lock); } - raw_spin_unlock_irq(&irq_resend_lock); } /* Tasklet to handle resend: */ @@ -75,19 +74,18 @@ static int irq_sw_resend(struct irq_desc *desc) } /* Add to resend_list and activate the softirq: */ - raw_spin_lock(&irq_resend_lock); - if (hlist_unhashed(&desc->resend_node)) - hlist_add_head(&desc->resend_node, &irq_resend_list); - raw_spin_unlock(&irq_resend_lock); + scoped_guard(raw_spinlock, &irq_resend_lock) { + if (hlist_unhashed(&desc->resend_node)) + hlist_add_head(&desc->resend_node, &irq_resend_list); + } tasklet_schedule(&resend_tasklet); return 0; } void clear_irq_resend(struct irq_desc *desc) { - raw_spin_lock(&irq_resend_lock); + guard(raw_spinlock)(&irq_resend_lock); hlist_del_init(&desc->resend_node); - raw_spin_unlock(&irq_resend_lock); } void irq_resend_init(struct irq_desc *desc) @@ -172,30 +170,24 @@ int check_irq_resend(struct irq_desc *desc, bool inject) */ int irq_inject_interrupt(unsigned int irq) { - struct irq_desc *desc; - unsigned long flags; - int err; + int err = -EINVAL; /* Try the state injection hardware interface first */ if (!irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, true)) return 0; /* That failed, try via the resend mechanism */ - desc = irq_get_desc_buslock(irq, &flags, 0); - if (!desc) - return -EINVAL; + scoped_irqdesc_get_and_buslock(irq, 0) { + struct irq_desc *desc = scoped_irqdesc; - /* - * Only try to inject when the interrupt is: - * - not NMI type - * - activated - */ - if (irq_is_nmi(desc) || !irqd_is_activated(&desc->irq_data)) - err = -EINVAL; - else - err = check_irq_resend(desc, true); - - irq_put_desc_busunlock(desc, flags); + /* + * Only try to inject when the interrupt is: + * - not NMI type + * - activated + */ + if (!irq_is_nmi(desc) && irqd_is_activated(&desc->irq_data)) + err = check_irq_resend(desc, true); + } return err; } EXPORT_SYMBOL_GPL(irq_inject_interrupt); -- 2.51.0 From 659ff9c9d77b8ad9d9c18e264abc9a56bd19230e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 29 Apr 2025 08:54:56 +0200 Subject: [PATCH 16/16] genirq/proc: Switch to lock guards Convert all lock/unlock pairs to guards and tidy up the code. No functional change. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250429065420.373998838@linutronix.de --- kernel/irq/proc.c | 65 +++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 8e29809de38d..94eba9a425c4 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -81,20 +81,18 @@ static int show_irq_affinity(int type, struct seq_file *m) static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) { struct irq_desc *desc = irq_to_desc((long)m->private); - unsigned long flags; cpumask_var_t mask; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; - raw_spin_lock_irqsave(&desc->lock, flags); - if (desc->affinity_hint) - cpumask_copy(mask, desc->affinity_hint); - raw_spin_unlock_irqrestore(&desc->lock, flags); + scoped_guard(raw_spinlock_irq, &desc->lock) { + if (desc->affinity_hint) + cpumask_copy(mask, desc->affinity_hint); + } seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); free_cpumask_var(mask); - return 0; } @@ -295,23 +293,18 @@ static int irq_spurious_proc_show(struct seq_file *m, void *v) #define MAX_NAMELEN 128 -static int name_unique(unsigned int irq, struct irqaction *new_action) +static bool name_unique(unsigned int irq, struct irqaction *new_action) { struct irq_desc *desc = irq_to_desc(irq); struct irqaction *action; - unsigned long flags; - int ret = 1; - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irq)(&desc->lock); for_each_action_of_desc(desc, action) { if ((action != new_action) && action->name && - !strcmp(new_action->name, action->name)) { - ret = 0; - break; - } + !strcmp(new_action->name, action->name)) + return false; } - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; + return true; } void register_handler_proc(unsigned int irq, struct irqaction *action) @@ -319,8 +312,7 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) char name [MAX_NAMELEN]; struct irq_desc *desc = irq_to_desc(irq); - if (!desc->dir || action->dir || !action->name || - !name_unique(irq, action)) + if (!desc->dir || action->dir || !action->name || !name_unique(irq, action)) return; snprintf(name, MAX_NAMELEN, "%s", action->name); @@ -347,17 +339,16 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) * added, not when the descriptor is created, so multiple * tasks might try to register at the same time. */ - mutex_lock(®ister_lock); + guard(mutex)(®ister_lock); if (desc->dir) - goto out_unlock; - - sprintf(name, "%d", irq); + return; /* create /proc/irq/1234 */ + sprintf(name, "%d", irq); desc->dir = proc_mkdir(name, root_irq_dir); if (!desc->dir) - goto out_unlock; + return; #ifdef CONFIG_SMP umode_t umode = S_IRUGO; @@ -366,31 +357,27 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) umode |= S_IWUSR; /* create /proc/irq//smp_affinity */ - proc_create_data("smp_affinity", umode, desc->dir, - &irq_affinity_proc_ops, irqp); + proc_create_data("smp_affinity", umode, desc->dir, &irq_affinity_proc_ops, irqp); /* create /proc/irq//affinity_hint */ proc_create_single_data("affinity_hint", 0444, desc->dir, - irq_affinity_hint_proc_show, irqp); + irq_affinity_hint_proc_show, irqp); /* create /proc/irq//smp_affinity_list */ proc_create_data("smp_affinity_list", umode, desc->dir, &irq_affinity_list_proc_ops, irqp); - proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show, - irqp); + proc_create_single_data("node", 0444, desc->dir, irq_node_proc_show, irqp); # ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK proc_create_single_data("effective_affinity", 0444, desc->dir, - irq_effective_aff_proc_show, irqp); + irq_effective_aff_proc_show, irqp); proc_create_single_data("effective_affinity_list", 0444, desc->dir, - irq_effective_aff_list_proc_show, irqp); + irq_effective_aff_list_proc_show, irqp); # endif #endif proc_create_single_data("spurious", 0444, desc->dir, - irq_spurious_proc_show, (void *)(long)irq); + irq_spurious_proc_show, (void *)(long)irq); -out_unlock: - mutex_unlock(®ister_lock); } void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) @@ -468,7 +455,6 @@ int show_interrupts(struct seq_file *p, void *v) int i = *(loff_t *) v, j; struct irqaction *action; struct irq_desc *desc; - unsigned long flags; if (i > ACTUAL_NR_IRQS) return 0; @@ -487,13 +473,13 @@ int show_interrupts(struct seq_file *p, void *v) seq_putc(p, '\n'); } - rcu_read_lock(); + guard(rcu)(); desc = irq_to_desc(i); if (!desc || irq_settings_is_hidden(desc)) - goto outsparse; + return 0; if (!desc->action || irq_desc_is_chained(desc) || !desc->kstat_irqs) - goto outsparse; + return 0; seq_printf(p, "%*d:", prec, i); for_each_online_cpu(j) { @@ -503,7 +489,7 @@ int show_interrupts(struct seq_file *p, void *v) } seq_putc(p, ' '); - raw_spin_lock_irqsave(&desc->lock, flags); + guard(raw_spinlock_irq)(&desc->lock); if (desc->irq_data.chip) { if (desc->irq_data.chip->irq_print_chip) desc->irq_data.chip->irq_print_chip(&desc->irq_data, p); @@ -532,9 +518,6 @@ int show_interrupts(struct seq_file *p, void *v) } seq_putc(p, '\n'); - raw_spin_unlock_irqrestore(&desc->lock, flags); -outsparse: - rcu_read_unlock(); return 0; } #endif -- 2.51.0