From 3ae8508663372b93c5556a887e96ed0ca5df0711 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 12:23:22 -0400 Subject: [PATCH 01/16] tools/power turbostat: Document GNR UncMHz domain convention Document that on Intel Granite Rapids Systems, Uncore domains 0-2 are CPU domains, and uncore domains 3-4 are IO domains. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 144565151e1e..e86493880c16 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -205,6 +205,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics \fBUncMHz\fP per-package uncore MHz, instantaneous sample. .PP \fBUMHz1.0\fP per-package uncore MHz for domain=1 and fabric_cluster=0, instantaneous sample. System summary is the average of all packages. +Intel Granite Rapids systems use domains 0-2 for CPUs, and 3-4 for IO, with cluster always 0. For the "--show" and "--hide" options, use "UncMHz" to operate on all UMHz*.* as a group. .SH TOO MUCH INFORMATION EXAMPLE By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters. -- 2.51.0 From f8b136ef2605c1bf62020462d10e35228760aa19 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 19 Mar 2025 08:53:07 +0800 Subject: [PATCH 02/16] tools/power turbostat: Restore GFX sysfs fflush() call Do fflush() to discard the buffered data, before each read of the graphics sysfs knobs. Fixes: ba99a4fc8c24 ("tools/power turbostat: Remove unnecessary fflush() call") Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 70e17d4ad9b6..c9a34c16c7a8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -6039,6 +6039,7 @@ int snapshot_graphics(int idx) int retval; rewind(gfx_info[idx].fp); + fflush(gfx_info[idx].fp); switch (idx) { case GFX_rc6: -- 2.51.0 From 994633894f208a0151baaee1688ab3c431912553 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 12:53:18 -0400 Subject: [PATCH 03/16] tools/power turbostat: re-factor sysfs code Probe cpuidle "sysfs" residency and counts separately, since soon we will make one disabled on, and the other disabled off. Clarify that some BIC (build-in-counters) are actually "groups". since we're about to re-name some of those groups. no functional change. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 31 ++++++++++++++++++--------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c9a34c16c7a8..df0391bedcde 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -273,10 +273,10 @@ struct msr_counter bic[] = { #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) -#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) -#define BIC_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) -#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) +#define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) +#define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) +#define BIC_GROUP_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -2354,16 +2354,16 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) retval |= ~0; break; } else if (!strcmp(name_list, "topology")) { - retval |= BIC_TOPOLOGY; + retval |= BIC_GROUP_TOPOLOGY; break; } else if (!strcmp(name_list, "power")) { - retval |= BIC_THERMAL_PWR; + retval |= BIC_GROUP_THERMAL_PWR; break; } else if (!strcmp(name_list, "idle")) { - retval |= BIC_IDLE; + retval |= BIC_GROUP_IDLE; break; } else if (!strcmp(name_list, "frequency")) { - retval |= BIC_FREQUENCY; + retval |= BIC_GROUP_FREQUENCY; break; } else if (!strcmp(name_list, "other")) { retval |= BIC_OTHER; @@ -10260,7 +10260,7 @@ int is_deferred_skip(char *name) return 0; } -void probe_sysfs(void) +void probe_cpuidle_residency(void) { char path[64]; char name_buf[16]; @@ -10304,6 +10304,16 @@ void probe_sysfs(void) if (state < min_state) min_state = state; } +} + +void probe_cpuidle_counts(void) +{ + char path[64]; + char name_buf[16]; + FILE *input; + int state; + int min_state = 1024, max_state = 0; + char *sp; for (state = 10; state >= 0; --state) { @@ -10602,7 +10612,8 @@ skip_cgroup_setting: print_bootcmd(); } - probe_sysfs(); + probe_cpuidle_residency(); + probe_cpuidle_counts(); if (!getuid()) set_rlimit(); -- 2.51.0 From 6f110a5e4f9977c31ce76fefbfef6fd4eab6bfb7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Apr 2025 10:00:04 -0700 Subject: [PATCH 04/16] Disable SLUB_TINY for build testing ... and don't error out so hard on missing module descriptions. Before commit 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") we used to warn about missing module descriptions, but only when building with extra warnigns (ie 'W=1'). After that commit the warning became an unconditional hard error. And it turns out not all modules have been converted despite the claims to the contrary. As reported by Damian Tometzki, the slub KUnit test didn't have a module description, and apparently nobody ever really noticed. The reason nobody noticed seems to be that the slub KUnit tests get disabled by SLUB_TINY, which also ends up disabling a lot of other code, both in tests and in slub itself. And so anybody doing full build tests didn't actually see this failre. So let's disable SLUB_TINY for build-only tests, since it clearly ends up limiting build coverage. Also turn the missing module descriptions error back into a warning, but let's keep it around for non-'W=1' builds. Reported-by: Damian Tometzki Link: https://lore.kernel.org/all/01070196099fd059-e8463438-7b1b-4ec8-816d-173874be9966-000000@eu-central-1.amazonses.com/ Cc: Masahiro Yamada Cc: Jeff Johnson Fixes: 6c6c1fc09de3 ("modpost: require a MODULE_DESCRIPTION()") Signed-off-by: Linus Torvalds --- mm/Kconfig | 2 +- scripts/mod/modpost.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index d3fb3762887b..e113f713b493 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -201,7 +201,7 @@ config KVFREE_RCU_BATCHED config SLUB_TINY bool "Configure for minimal memory footprint" - depends on EXPERT + depends on EXPERT && !COMPILE_TEST select SLAB_MERGE_DEFAULT help Configures the slab allocator in a way to achieve minimal memory diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 92627e8d0e16..be89921d60b6 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1603,7 +1603,7 @@ static void read_symbols(const char *modname) } if (!get_modinfo(&info, "description")) - error("missing MODULE_DESCRIPTION() in %s\n", modname); + warn("missing MODULE_DESCRIPTION() in %s\n", modname); } for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { -- 2.51.0 From ec4acd3166d8a7a03b059d01b9c6f11a658e833f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 14:29:57 -0400 Subject: [PATCH 05/16] tools/power turbostat: disable "cpuidle" invocation counters, by default Create "pct_idle" counter group, the sofware notion of residency so it can now be singled out, independent of other counter groups. Create "cpuidle" group, the cpuidle invocation counts. Disable "cpuidle", by default. Create "swidle" = "cpuidle" + "pct_idle". Undocument "sysfs", the old name for "swidle", but keep it working for backwards compatibilty. Create "hwidle", all the HW idle counters Modify "idle", enabled by default "idle" = "hwidle" + "pct_idle" (and now excludes "cpuidle") Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 12 +++++----- tools/power/x86/turbostat/turbostat.c | 34 +++++++++++++++++++++------ 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e86493880c16..b74ed916057e 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -100,7 +100,7 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. .PP -\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other". +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "cpuidle", "hwidle", "swidle", "other". "idle" (enabled by default), includes "hwidle" and "idle_pct". "cpuidle" (default disabled) includes cpuidle software invocation counters. "swidle" includes "cpuidle" plus "idle_pct". "hwidle" includes only hardware based idle residency counters. Older versions of turbostat used the term "sysfs" for what is now "swidle". .PP \fB--Dump\fP displays the raw counter values. .PP @@ -158,15 +158,15 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. .PP -\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. +\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. +\fBC1+, C2+, C3+...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a deeper idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/below file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. +\fBC1-, C2-, C3-...\fP The idle governor idle state misprediction statistics. Inidcates the number times Linux requested the C1, C2, C3 idle state during the measurement interval, but should have requested a shallower idle state (if it exists and enabled). These statistics come from the /sys/devices/system/cpu/cpu*/cpuidle/state*/above file. These counters are in the "cpuidle" group, which is disabled, by default. .PP -\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. +\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. These counters are in the "pct_idle" group, which is enabled by default. .PP -\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. +\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters and are in the "hwidle" group, which is enabled, by default. .PP \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. .PP diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index df0391bedcde..ab184f95cdaf 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -153,7 +153,7 @@ struct msr_counter bic[] = { { 0x0, "TSC_MHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "IRQ", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SMI", NULL, 32, 0, FORMAT_DELTA, NULL, 0 }, - { 0x0, "sysfs", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "cpuidle", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c3", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c6", NULL, 0, 0, 0, NULL, 0 }, @@ -206,6 +206,7 @@ struct msr_counter bic[] = { { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "NMI", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "CPU%c1e", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "pct_idle", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -219,7 +220,7 @@ struct msr_counter bic[] = { #define BIC_TSC_MHz (1ULL << 7) #define BIC_IRQ (1ULL << 8) #define BIC_SMI (1ULL << 9) -#define BIC_sysfs (1ULL << 10) +#define BIC_cpuidle (1ULL << 10) #define BIC_CPU_c1 (1ULL << 11) #define BIC_CPU_c3 (1ULL << 12) #define BIC_CPU_c6 (1ULL << 13) @@ -272,17 +273,20 @@ struct msr_counter bic[] = { #define BIC_Sys_J (1ULL << 60) #define BIC_NMI (1ULL << 61) #define BIC_CPU_c1e (1ULL << 62) +#define BIC_pct_idle (1ULL << 63) #define BIC_GROUP_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die) #define BIC_GROUP_THERMAL_PWR (BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__ | BIC_SysWatt) #define BIC_GROUP_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_GROUP_IDLE (BIC_Busy | BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_HW_IDLE (BIC_Busy | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) +#define BIC_GROUP_SW_IDLE (BIC_Busy | BIC_cpuidle | BIC_pct_idle ) +#define BIC_GROUP_IDLE (BIC_GROUP_HW_IDLE | BIC_pct_idle) #define BIC_OTHER (BIC_IRQ | BIC_NMI | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_cpuidle) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); -unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_cpuidle | BIC_pct_idle | BIC_APIC | BIC_X2APIC; #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) #define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) @@ -2362,6 +2366,15 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } else if (!strcmp(name_list, "idle")) { retval |= BIC_GROUP_IDLE; break; + } else if (!strcmp(name_list, "swidle")) { + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "sysfs")) { /* legacy compatibility */ + retval |= BIC_GROUP_SW_IDLE; + break; + } else if (!strcmp(name_list, "hwidle")) { + retval |= BIC_GROUP_HW_IDLE; + break; } else if (!strcmp(name_list, "frequency")) { retval |= BIC_GROUP_FREQUENCY; break; @@ -2372,6 +2385,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) } if (i == MAX_BIC) { + fprintf(stderr, "deferred %s\n", name_list); if (mode == SHOW_LIST) { deferred_add_names[deferred_add_index++] = name_list; if (deferred_add_index >= MAX_DEFERRED) { @@ -10269,6 +10283,9 @@ void probe_cpuidle_residency(void) int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_pct_idle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10291,7 +10308,7 @@ void probe_cpuidle_residency(void) sprintf(path, "cpuidle/state%d/time", state); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_pct_idle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) @@ -10315,6 +10332,9 @@ void probe_cpuidle_counts(void) int min_state = 1024, max_state = 0; char *sp; + if (!DO_BIC(BIC_cpuidle)) + return; + for (state = 10; state >= 0; --state) { sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); @@ -10327,7 +10347,7 @@ void probe_cpuidle_counts(void) remove_underbar(name_buf); - if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + if (!DO_BIC(BIC_cpuidle) && !is_deferred_add(name_buf)) continue; if (is_deferred_skip(name_buf)) -- 2.51.0 From 03e00e373cab981ad808271b2650700cfa0fbda6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 6 Apr 2025 14:49:20 -0400 Subject: [PATCH 06/16] tools/power turbostat: v2025.05.06 Support up to 8192 processors Add cpuidle governor debug telemetry, disabled by default Update default output to exclude cpuidle invocation counts Bug fixes Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ab184f95cdaf..1b9fdc1a7ee8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9594,7 +9594,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2025.02.02 - Len Brown \n"); + fprintf(outf, "turbostat version 2025.04.06 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 -- 2.51.0 From 0efdedb3358aa78102967f242379686f94315830 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 2 Apr 2025 21:21:57 +0100 Subject: [PATCH 07/16] tools/include: make uapi/linux/types.h usable from assembly MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The "real" linux/types.h UAPI header gracefully degrades to a NOOP when included from assembly code. Mirror this behaviour in the tools/ variant. Test for __ASSEMBLER__ over __ASSEMBLY__ as the former is provided by the toolchain automatically. Reported-by: Mark Brown Closes: https://lore.kernel.org/lkml/af553c62-ca2f-4956-932c-dd6e3a126f58@sirena.org.uk/ Fixes: c9fbaa879508 ("selftests: vDSO: parse_vdso: Use UAPI headers instead of libc headers") Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20250321-uapi-consistency-v1-1-439070118dc0@linutronix.de Signed-off-by: Mark Brown Reviewed-by: Mark Brown Signed-off-by: Linus Torvalds --- tools/include/uapi/linux/types.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h index 91fa51a9c31d..85aa327245c6 100644 --- a/tools/include/uapi/linux/types.h +++ b/tools/include/uapi/linux/types.h @@ -4,6 +4,8 @@ #include +#ifndef __ASSEMBLER__ + /* copied from linux:include/uapi/linux/types.h */ #define __bitwise typedef __u16 __bitwise __le16; @@ -20,4 +22,5 @@ typedef __u32 __bitwise __wsum; #define __aligned_be64 __be64 __attribute__((aligned(8))) #define __aligned_le64 __le64 __attribute__((aligned(8))) +#endif /* __ASSEMBLER__ */ #endif /* _UAPI_LINUX_TYPES_H */ -- 2.51.0 From 0af2f6be1b4281385b618cb86ad946eded089ac8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 6 Apr 2025 13:11:33 -0700 Subject: [PATCH 08/16] Linux 6.15-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e55726a71d95..38689a0c3605 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 14 +PATCHLEVEL = 15 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From ebf7547fd1df2db32b83de6f341c7bd9f0e98200 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 27 Mar 2025 12:07:07 +0100 Subject: [PATCH 09/16] mtip32xx: Remove unnecessary pcim_iounmap_regions() calls pcim_iounmap_regions() is deprecated. Moreover, it is not necessary to call it in the driver's remove() function or if probe() fails, since it does cleanup automatically on driver detach. Remove all calls to pcim_iounmap_regions(). Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Acked-by: Jens Axboe Acked-by: Mark Brown Link: https://patch.msgid.link/20250327110707.20025-3-phasta@kernel.org --- drivers/block/mtip32xx/mtip32xx.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 0d619df03fa9..66ce6b81c7d9 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3717,7 +3717,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, rv = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (rv) { dev_warn(&pdev->dev, "64-bit DMA enable failed\n"); - goto setmask_err; + goto iomap_err; } /* Copy the info we may need later into the private data structure. */ @@ -3733,7 +3733,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, if (!dd->isr_workq) { dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); rv = -ENOMEM; - goto setmask_err; + goto iomap_err; } memset(cpu_list, 0, sizeof(cpu_list)); @@ -3830,8 +3830,6 @@ msi_initialize_err: drop_cpu(dd->work[1].cpu_binding); drop_cpu(dd->work[2].cpu_binding); } -setmask_err: - pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); iomap_err: kfree(dd); @@ -3907,7 +3905,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) pci_disable_msi(pdev); - pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); pci_set_drvdata(pdev, NULL); put_disk(dd->disk); -- 2.51.0 From 855c634930f04c21434fae9c41a7a7372b6ac879 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Thu, 27 Mar 2025 12:07:08 +0100 Subject: [PATCH 10/16] PCI: Remove pcim_iounmap_regions() All users of the deprecated function pcim_iounmap_regions() have been ported by now. Remove it. Signed-off-by: Philipp Stanner Signed-off-by: Bjorn Helgaas Reviewed-by: Zijun Hu Reviewed-by: Andy Shevchenko Link: https://patch.msgid.link/20250327110707.20025-4-phasta@kernel.org --- .../driver-api/driver-model/devres.rst | 1 - drivers/pci/devres.c | 24 ------------------- include/linux/pci.h | 1 - 3 files changed, 26 deletions(-) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index d75728eb05f8..601f1a74d34d 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -396,7 +396,6 @@ PCI pcim_iomap_regions() : do request_region() and iomap() on multiple BARs pcim_iomap_table() : array of mapped addresses indexed by BAR pcim_iounmap() : do iounmap() on a single BAR - pcim_iounmap_regions() : do iounmap() and release_region() on multiple BARs pcim_pin_device() : keep PCI device enabled after release pcim_set_mwi() : enable Memory-Write-Invalidate PCI transaction diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index 73047316889e..0317c56ac27d 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -955,30 +955,6 @@ err: } EXPORT_SYMBOL(pcim_request_all_regions); -/** - * pcim_iounmap_regions - Unmap and release PCI BARs (DEPRECATED) - * @pdev: PCI device to map IO resources for - * @mask: Mask of BARs to unmap and release - * - * Unmap and release regions specified by @mask. - * - * This function is DEPRECATED. Do not use it in new code. - * Use pcim_iounmap_region() instead. - */ -void pcim_iounmap_regions(struct pci_dev *pdev, int mask) -{ - int i; - - for (i = 0; i < PCI_STD_NUM_BARS; i++) { - if (!mask_contains_bar(mask, i)) - continue; - - pcim_iounmap_region(pdev, i); - pcim_remove_bar_from_legacy_table(pdev, i); - } -} -EXPORT_SYMBOL(pcim_iounmap_regions); - /** * pcim_iomap_range - Create a ranged __iomap mapping within a PCI BAR * @pdev: PCI device to map IO resources for diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..a60fdb344d9e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2322,7 +2322,6 @@ void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name); -void pcim_iounmap_regions(struct pci_dev *pdev, int mask); void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar, unsigned long offset, unsigned long len); -- 2.51.0 From 51f6aec99cb0493b6c288ba55bed2ef9fb6242ca Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 19 May 2025 13:29:55 +0200 Subject: [PATCH 11/16] PCI: Remove hybrid devres nature from request functions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit All functions based on __pci_request_region() and its release counter part support "hybrid mode", where the functions become managed if the PCI device was enabled with pcim_enable_device(). Removing this undesirable feature requires to remove all users who activated their device with that function and use one of the affected request functions. These users were: ASoC alsa cardreader cirrus i2c mmc mtd mtd mxser net spi vdpa vmwgfx all of which have been ported to always-managed pcim_ functions by now. The hybrid nature can, thus, be removed from the aforementioned PCI functions. Remove all function guards and documentation in pci.c related to the hybrid redirection. Adjust the visibility of pcim_release_region(). Signed-off-by: Philipp Stanner Signed-off-by: Krzysztof Wilczyński Reviewed-by: Andy Shevchenko Reviewed-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/20250519112959.25487-3-phasta@kernel.org --- drivers/pci/devres.c | 41 +++++++++++++---------------------------- drivers/pci/pci.c | 42 ------------------------------------------ drivers/pci/pci.h | 1 - 3 files changed, 13 insertions(+), 71 deletions(-) diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index 0317c56ac27d..dde109095428 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -6,30 +6,13 @@ /* * On the state of PCI's devres implementation: * - * The older devres API for PCI has two significant problems: - * - * 1. It is very strongly tied to the statically allocated mapping table in - * struct pcim_iomap_devres below. This is mostly solved in the sense of the - * pcim_ functions in this file providing things like ranged mapping by - * bypassing this table, whereas the functions that were present in the old - * API still enter the mapping addresses into the table for users of the old - * API. - * - * 2. The region-request-functions in pci.c do become managed IF the device has - * been enabled with pcim_enable_device() instead of pci_enable_device(). - * This resulted in the API becoming inconsistent: Some functions have an - * obviously managed counter-part (e.g., pci_iomap() <-> pcim_iomap()), - * whereas some don't and are never managed, while others don't and are - * _sometimes_ managed (e.g. pci_request_region()). - * - * Consequently, in the new API, region requests performed by the pcim_ - * functions are automatically cleaned up through the devres callback - * pcim_addr_resource_release(). - * - * Users of pcim_enable_device() + pci_*region*() are redirected in - * pci.c to the managed functions here in this file. This isn't exactly - * perfect, but the only alternative way would be to port ALL drivers - * using said combination to pcim_ functions. + * The older PCI devres API has one significant problem: + * + * It is very strongly tied to the statically allocated mapping table in struct + * pcim_iomap_devres below. This is mostly solved in the sense of the pcim_ + * functions in this file providing things like ranged mapping by bypassing + * this table, whereas the functions that were present in the old API still + * enter the mapping addresses into the table for users of the old API. * * TODO: * Remove the legacy table entirely once all calls to pcim_iomap_table() in @@ -89,10 +72,12 @@ static inline void pcim_addr_devres_clear(struct pcim_addr_devres *res) /* * The following functions, __pcim_*_region*, exist as counterparts to the - * versions from pci.c - which, unfortunately, can be in "hybrid mode", i.e., - * sometimes managed, sometimes not. + * versions from pci.c - which, unfortunately, were in the past in "hybrid + * mode", i.e., sometimes managed, sometimes not. + * + * To separate the APIs cleanly, we defined our own, simplified versions here. * - * To separate the APIs cleanly, we define our own, simplified versions here. + * TODO: unify those functions with the counterparts in pci.c */ /** @@ -893,7 +878,7 @@ int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *nam * Release a region manually that was previously requested by * pcim_request_region(). */ -void pcim_release_region(struct pci_dev *pdev, int bar) +static void pcim_release_region(struct pci_dev *pdev, int bar) { struct pcim_addr_devres res_searched; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 4d7c9f64ea24..fe241fabe0f3 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3937,16 +3937,6 @@ void pci_release_region(struct pci_dev *pdev, int bar) if (!pci_bar_index_is_valid(bar)) return; - /* - * This is done for backwards compatibility, because the old PCI devres - * API had a mode in which the function became managed if it had been - * enabled with pcim_enable_device() instead of pci_enable_device(). - */ - if (pci_is_managed(pdev)) { - pcim_release_region(pdev, bar); - return; - } - if (pci_resource_len(pdev, bar) == 0) return; if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) @@ -3984,13 +3974,6 @@ static int __pci_request_region(struct pci_dev *pdev, int bar, if (!pci_bar_index_is_valid(bar)) return -EINVAL; - if (pci_is_managed(pdev)) { - if (exclusive == IORESOURCE_EXCLUSIVE) - return pcim_request_region_exclusive(pdev, bar, name); - - return pcim_request_region(pdev, bar, name); - } - if (pci_resource_len(pdev, bar) == 0) return 0; @@ -4027,11 +4010,6 @@ err_out: * * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. - * - * NOTE: - * This is a "hybrid" function: It's normally unmanaged, but becomes managed - * when pcim_enable_device() has been called in advance. This hybrid feature is - * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead. */ int pci_request_region(struct pci_dev *pdev, int bar, const char *name) { @@ -4084,11 +4062,6 @@ err_out: * @name: Name of the driver requesting the resources * * Returns: 0 on success, negative error code on failure. - * - * NOTE: - * This is a "hybrid" function: It's normally unmanaged, but becomes managed - * when pcim_enable_device() has been called in advance. This hybrid feature is - * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead. */ int pci_request_selected_regions(struct pci_dev *pdev, int bars, const char *name) @@ -4104,11 +4077,6 @@ EXPORT_SYMBOL(pci_request_selected_regions); * @name: name of the driver requesting the resources * * Returns: 0 on success, negative error code on failure. - * - * NOTE: - * This is a "hybrid" function: It's normally unmanaged, but becomes managed - * when pcim_enable_device() has been called in advance. This hybrid feature is - * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead. */ int pci_request_selected_regions_exclusive(struct pci_dev *pdev, int bars, const char *name) @@ -4144,11 +4112,6 @@ EXPORT_SYMBOL(pci_release_regions); * * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. - * - * NOTE: - * This is a "hybrid" function: It's normally unmanaged, but becomes managed - * when pcim_enable_device() has been called in advance. This hybrid feature is - * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead. */ int pci_request_regions(struct pci_dev *pdev, const char *name) { @@ -4173,11 +4136,6 @@ EXPORT_SYMBOL(pci_request_regions); * * Returns 0 on success, or %EBUSY on error. A warning message is also * printed on failure. - * - * NOTE: - * This is a "hybrid" function: It's normally unmanaged, but becomes managed - * when pcim_enable_device() has been called in advance. This hybrid feature is - * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead. */ int pci_request_regions_exclusive(struct pci_dev *pdev, const char *name) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index b81e99cd4b62..8c3e5fb2443a 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1062,7 +1062,6 @@ static inline pci_power_t mid_pci_get_power_state(struct pci_dev *pdev) int pcim_intx(struct pci_dev *dev, int enable); int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *name); -void pcim_release_region(struct pci_dev *pdev, int bar); /* * Config Address for PCI Configuration Mechanism #1 -- 2.51.0 From b4fb90fb930193bf391b0fd8e9e4fa63b347e3ed Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 19 May 2025 13:29:56 +0200 Subject: [PATCH 12/16] Documentation/driver-api: Update pcim_enable_device() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit pcim_enable_device() is not related anymore to switching the mode of operation of any functions. It merely sets up a devres callback for automatically disabling the PCI device on driver detach. Adjust the function's documentation. Signed-off-by: Philipp Stanner Signed-off-by: Krzysztof Wilczyński Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250519112959.25487-4-phasta@kernel.org --- Documentation/driver-api/driver-model/devres.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index 601f1a74d34d..3d56f94ac2ee 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -391,7 +391,7 @@ PCI devm_pci_remap_cfgspace() : ioremap PCI configuration space devm_pci_remap_cfg_resource() : ioremap PCI configuration space resource - pcim_enable_device() : after success, some PCI ops become managed + pcim_enable_device() : after success, the PCI device gets disabled automatically on driver detach pcim_iomap() : do iomap() on a single BAR pcim_iomap_regions() : do request_region() and iomap() on multiple BARs pcim_iomap_table() : array of mapped addresses indexed by BAR -- 2.51.0 From 8e9987485d9ae233fdd5b3f8e6857d28bcfe6a04 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 19 May 2025 13:29:57 +0200 Subject: [PATCH 13/16] PCI: Remove pcim_request_region_exclusive() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit pcim_request_region_exclusive() was only needed for redirecting the relatively exotic exclusive request functions in pci.c in case of them operating in managed mode. The managed nature has been removed from those functions and no one else uses pcim_request_region_exclusive(). Remove pcim_request_region_exclusive(). Signed-off-by: Philipp Stanner Signed-off-by: Krzysztof Wilczyński Reviewed-by: Andy Shevchenko Reviewed-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/20250519112959.25487-5-phasta@kernel.org --- drivers/pci/devres.c | 18 ------------------ drivers/pci/pci.h | 2 -- 2 files changed, 20 deletions(-) diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index dde109095428..9f74cfd025fc 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -852,24 +852,6 @@ int pcim_request_region(struct pci_dev *pdev, int bar, const char *name) } EXPORT_SYMBOL(pcim_request_region); -/** - * pcim_request_region_exclusive - Request a PCI BAR exclusively - * @pdev: PCI device to request region for - * @bar: Index of BAR to request - * @name: Name of the driver requesting the resource - * - * Returns: 0 on success, a negative error code on failure. - * - * Request region specified by @bar exclusively. - * - * The region will automatically be released on driver detach. If desired, - * release manually only with pcim_release_region(). - */ -int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *name) -{ - return _pcim_request_region(pdev, bar, name, IORESOURCE_EXCLUSIVE); -} - /** * pcim_release_region - Release a PCI BAR * @pdev: PCI device to operate on diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 8c3e5fb2443a..cfc9e71a4d84 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1060,8 +1060,6 @@ static inline pci_power_t mid_pci_get_power_state(struct pci_dev *pdev) #endif int pcim_intx(struct pci_dev *dev, int enable); -int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, - const char *name); /* * Config Address for PCI Configuration Mechanism #1 -- 2.51.0 From 85826c11e77bb8bd18fbdae9bd04e51b4711c6f7 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 19 May 2025 13:29:58 +0200 Subject: [PATCH 14/16] PCI: Remove exclusive requests flags from _pcim_request_region() MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit pcim_request_region_exclusive(), the only user in PCI devres that needed exclusive region requests, has been removed. All features related to exclusive requests can, therefore, be removed, too. Remove them. Signed-off-by: Philipp Stanner [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20250519112959.25487-6-phasta@kernel.org --- drivers/pci/devres.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index 9f74cfd025fc..c8713571a7b8 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -808,8 +808,20 @@ err: } EXPORT_SYMBOL(pcim_iomap_regions); -static int _pcim_request_region(struct pci_dev *pdev, int bar, const char *name, - int request_flags) +/** + * pcim_request_region - Request a PCI BAR + * @pdev: PCI device to request region for + * @bar: Index of BAR to request + * @name: Name of the driver requesting the resource + * + * Returns: 0 on success, a negative error code on failure. + * + * Request region specified by @bar. + * + * The region will automatically be released on driver detach. If desired, + * release manually only with pcim_release_region(). + */ +int pcim_request_region(struct pci_dev *pdev, int bar, const char *name) { int ret; struct pcim_addr_devres *res; @@ -823,7 +835,7 @@ static int _pcim_request_region(struct pci_dev *pdev, int bar, const char *name, res->type = PCIM_ADDR_DEVRES_TYPE_REGION; res->bar = bar; - ret = __pcim_request_region(pdev, bar, name, request_flags); + ret = __pcim_request_region(pdev, bar, name, 0); if (ret != 0) { pcim_addr_devres_free(res); return ret; @@ -832,24 +844,6 @@ static int _pcim_request_region(struct pci_dev *pdev, int bar, const char *name, devres_add(&pdev->dev, res); return 0; } - -/** - * pcim_request_region - Request a PCI BAR - * @pdev: PCI device to request region for - * @bar: Index of BAR to request - * @name: Name of the driver requesting the resource - * - * Returns: 0 on success, a negative error code on failure. - * - * Request region specified by @bar. - * - * The region will automatically be released on driver detach. If desired, - * release manually only with pcim_release_region(). - */ -int pcim_request_region(struct pci_dev *pdev, int bar, const char *name) -{ - return _pcim_request_region(pdev, bar, name, 0); -} EXPORT_SYMBOL(pcim_request_region); /** -- 2.51.0 From bcfc67157e413cbb41d363f8ee961a01130d0a73 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 19 May 2025 13:29:59 +0200 Subject: [PATCH 15/16] PCI: Remove redundant set of request functions MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When the demangling of the hybrid devres functions within PCI was implemented, it was necessary to implement several PCI functions a second time to avoid cyclic calls, since the hybrid functions in pci.c call the managed functions in devres.c, which in turn can be directly used outside of PCI and needed request infrastructure, too. Therefore, __pcim_request_region_range(), __pci_release_region_range() and wrappers around them were implemented. The hybrid nature has recently been removed from all functions in pci.c. Therefore, the functions in devres.c can now directly use their counterparts in pci.c without causing a call-cycle. Remove __pcim_request_region_range(), __pcim_request_region_range() and the wrappers. Use the corresponding request functions from pci.c in devres.c Signed-off-by: Philipp Stanner Signed-off-by: Krzysztof Wilczyński Reviewed-by: Andy Shevchenko Reviewed-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/20250519112959.25487-7-phasta@kernel.org --- drivers/pci/devres.c | 110 ++----------------------------------------- 1 file changed, 5 insertions(+), 105 deletions(-) diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index c8713571a7b8..9f4190501395 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -70,106 +70,6 @@ static inline void pcim_addr_devres_clear(struct pcim_addr_devres *res) res->bar = -1; } -/* - * The following functions, __pcim_*_region*, exist as counterparts to the - * versions from pci.c - which, unfortunately, were in the past in "hybrid - * mode", i.e., sometimes managed, sometimes not. - * - * To separate the APIs cleanly, we defined our own, simplified versions here. - * - * TODO: unify those functions with the counterparts in pci.c - */ - -/** - * __pcim_request_region_range - Request a ranged region - * @pdev: PCI device the region belongs to - * @bar: BAR the range is within - * @offset: offset from the BAR's start address - * @maxlen: length in bytes, beginning at @offset - * @name: name of the driver requesting the resource - * @req_flags: flags for the request, e.g., for kernel-exclusive requests - * - * Returns: 0 on success, a negative error code on failure. - * - * Request a range within a device's PCI BAR. Sanity check the input. - */ -static int __pcim_request_region_range(struct pci_dev *pdev, int bar, - unsigned long offset, - unsigned long maxlen, - const char *name, int req_flags) -{ - resource_size_t start = pci_resource_start(pdev, bar); - resource_size_t len = pci_resource_len(pdev, bar); - unsigned long dev_flags = pci_resource_flags(pdev, bar); - - if (start == 0 || len == 0) /* Unused BAR. */ - return 0; - if (len <= offset) - return -EINVAL; - - start += offset; - len -= offset; - - if (len > maxlen && maxlen != 0) - len = maxlen; - - if (dev_flags & IORESOURCE_IO) { - if (!request_region(start, len, name)) - return -EBUSY; - } else if (dev_flags & IORESOURCE_MEM) { - if (!__request_mem_region(start, len, name, req_flags)) - return -EBUSY; - } else { - /* That's not a device we can request anything on. */ - return -ENODEV; - } - - return 0; -} - -static void __pcim_release_region_range(struct pci_dev *pdev, int bar, - unsigned long offset, - unsigned long maxlen) -{ - resource_size_t start = pci_resource_start(pdev, bar); - resource_size_t len = pci_resource_len(pdev, bar); - unsigned long flags = pci_resource_flags(pdev, bar); - - if (len <= offset || start == 0) - return; - - if (len == 0 || maxlen == 0) /* This an unused BAR. Do nothing. */ - return; - - start += offset; - len -= offset; - - if (len > maxlen) - len = maxlen; - - if (flags & IORESOURCE_IO) - release_region(start, len); - else if (flags & IORESOURCE_MEM) - release_mem_region(start, len); -} - -static int __pcim_request_region(struct pci_dev *pdev, int bar, - const char *name, int flags) -{ - unsigned long offset = 0; - unsigned long len = pci_resource_len(pdev, bar); - - return __pcim_request_region_range(pdev, bar, offset, len, name, flags); -} - -static void __pcim_release_region(struct pci_dev *pdev, int bar) -{ - unsigned long offset = 0; - unsigned long len = pci_resource_len(pdev, bar); - - __pcim_release_region_range(pdev, bar, offset, len); -} - static void pcim_addr_resource_release(struct device *dev, void *resource_raw) { struct pci_dev *pdev = to_pci_dev(dev); @@ -177,11 +77,11 @@ static void pcim_addr_resource_release(struct device *dev, void *resource_raw) switch (res->type) { case PCIM_ADDR_DEVRES_TYPE_REGION: - __pcim_release_region(pdev, res->bar); + pci_release_region(pdev, res->bar); break; case PCIM_ADDR_DEVRES_TYPE_REGION_MAPPING: pci_iounmap(pdev, res->baseaddr); - __pcim_release_region(pdev, res->bar); + pci_release_region(pdev, res->bar); break; case PCIM_ADDR_DEVRES_TYPE_MAPPING: pci_iounmap(pdev, res->baseaddr); @@ -720,7 +620,7 @@ void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, res->type = PCIM_ADDR_DEVRES_TYPE_REGION_MAPPING; res->bar = bar; - ret = __pcim_request_region(pdev, bar, name, 0); + ret = pci_request_region(pdev, bar, name); if (ret != 0) goto err_region; @@ -734,7 +634,7 @@ void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, return res->baseaddr; err_iomap: - __pcim_release_region(pdev, bar); + pci_release_region(pdev, bar); err_region: pcim_addr_devres_free(res); @@ -835,7 +735,7 @@ int pcim_request_region(struct pci_dev *pdev, int bar, const char *name) res->type = PCIM_ADDR_DEVRES_TYPE_REGION; res->bar = bar; - ret = __pcim_request_region(pdev, bar, name, 0); + ret = pci_request_region(pdev, bar, name); if (ret != 0) { pcim_addr_devres_free(res); return ret; -- 2.51.0 From 90ffe1f093e85ebb70212909607e0c0517aa65a5 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Mon, 19 May 2025 13:30:00 +0200 Subject: [PATCH 16/16] PCI: Remove hybrid-devres usage warnings from kernel-doc MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit pci/iomap.c still contains warnings about those functions not behaving in a managed manner if pcim_enable_device() was called. Since all hybrid behavior that users could know about has been removed by now, those explicit warnings are no longer necessary. Remove the hybrid-devres usage warnings from the kernel-doc. Signed-off-by: Philipp Stanner [kwilczynski: commit log] Signed-off-by: Krzysztof Wilczyński Reviewed-by: Andy Shevchenko Reviewed-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/20250519112959.25487-8-phasta@kernel.org --- drivers/pci/iomap.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/pci/iomap.c b/drivers/pci/iomap.c index fe706ed946df..ea86c282a386 100644 --- a/drivers/pci/iomap.c +++ b/drivers/pci/iomap.c @@ -25,10 +25,6 @@ * * @maxlen specifies the maximum length to map. If you want to get access to * the complete BAR from offset to the end, pass %0 here. - * - * NOTE: - * This function is never managed, even if you initialized with - * pcim_enable_device(). * */ void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, @@ -76,10 +72,6 @@ EXPORT_SYMBOL(pci_iomap_range); * * @maxlen specifies the maximum length to map. If you want to get access to * the complete BAR from offset to the end, pass %0 here. - * - * NOTE: - * This function is never managed, even if you initialized with - * pcim_enable_device(). * */ void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar, @@ -127,10 +119,6 @@ EXPORT_SYMBOL_GPL(pci_iomap_wc_range); * * @maxlen specifies the maximum length to map. If you want to get access to * the complete BAR without checking for its length first, pass %0 here. - * - * NOTE: - * This function is never managed, even if you initialized with - * pcim_enable_device(). If you need automatic cleanup, use pcim_iomap(). * */ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { @@ -152,10 +140,6 @@ EXPORT_SYMBOL(pci_iomap); * * @maxlen specifies the maximum length to map. If you want to get access to * the complete BAR without checking for its length first, pass %0 here. - * - * NOTE: - * This function is never managed, even if you initialized with - * pcim_enable_device(). * */ void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen) { -- 2.51.0