From e5f687b89bc2a892256f48e14a568970b59a4812 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Wed, 2 Oct 2024 15:05:15 +0200 Subject: [PATCH 01/16] tools/power turbostat: Add RAPL psys as a built-in counter Introduce the counter as a part of global, platform counters structure. We open the counter for only one cpu, but otherwise treat it as an ordinary RAPL counter, allowing for grouped perf read. The counter is disabled by default, because it's interpretation may require additional, platform specific information, making it unsuitable for general use. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 + tools/power/x86/turbostat/turbostat.c | 93 ++++++++++++++++++++++++--- 2 files changed, 85 insertions(+), 10 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 56c7ff6efcda..95eb02346d3a 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -190,6 +190,8 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. .PP +\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default. May require platform specific information to interpret the data, making it not suitable for general use. +.PP \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary. .PP \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9025c2945737..88c7f896c5b2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -200,6 +200,8 @@ struct msr_counter bic[] = { { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "SysWatt", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "Sys_J", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -262,6 +264,8 @@ struct msr_counter bic[] = { #define BIC_SAMMHz (1ULL << 56) #define BIC_SAMACTMHz (1ULL << 57) #define BIC_Diec6 (1ULL << 58) +#define BIC_SysWatt (1ULL << 59) +#define BIC_Sys_J (1ULL << 60) #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) @@ -269,7 +273,7 @@ struct msr_counter bic[] = { #define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) -#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC | BIC_SysWatt | BIC_Sys_J) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; @@ -507,12 +511,15 @@ enum rapl_msrs { RAPL_AMD_PWR_UNIT = BIT(14), /* 0xc0010299 MSR_AMD_RAPL_POWER_UNIT */ RAPL_AMD_CORE_ENERGY_STAT = BIT(15), /* 0xc001029a MSR_AMD_CORE_ENERGY_STATUS */ RAPL_AMD_PKG_ENERGY_STAT = BIT(16), /* 0xc001029b MSR_AMD_PKG_ENERGY_STATUS */ + RAPL_PLATFORM_ENERGY_LIMIT = BIT(17), /* 0x64c MSR_PLATFORM_ENERGY_LIMIT */ + RAPL_PLATFORM_ENERGY_STATUS = BIT(18), /* 0x64d MSR_PLATFORM_ENERGY_STATUS */ }; #define RAPL_PKG (RAPL_PKG_ENERGY_STATUS | RAPL_PKG_POWER_LIMIT) #define RAPL_DRAM (RAPL_DRAM_ENERGY_STATUS | RAPL_DRAM_POWER_LIMIT) #define RAPL_CORE (RAPL_CORE_ENERGY_STATUS | RAPL_CORE_POWER_LIMIT) #define RAPL_GFX (RAPL_GFX_POWER_LIMIT | RAPL_GFX_ENERGY_STATUS) +#define RAPL_PSYS (RAPL_PLATFORM_ENERGY_STATUS | RAPL_PLATFORM_ENERGY_LIMIT) #define RAPL_PKG_ALL (RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO) #define RAPL_DRAM_ALL (RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_DRAM_POWER_INFO) @@ -713,7 +720,7 @@ static const struct platform_features skl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; @@ -730,7 +737,7 @@ static const struct platform_features cnl_features = { .has_ext_cst_msrs = 1, .trl_msrs = TRL_BASE, .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, + .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX | RAPL_PSYS, .enable_tsc_tweak = 1, }; @@ -797,7 +804,7 @@ static const struct platform_features icx_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, .has_fixed_rapl_unit = 1, }; @@ -813,7 +820,7 @@ static const struct platform_features spr_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features srf_features = { @@ -829,7 +836,7 @@ static const struct platform_features srf_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features grr_features = { @@ -845,7 +852,7 @@ static const struct platform_features grr_features = { .has_irtl_msrs = 1, .has_cst_prewake_bit = 1, .trl_msrs = TRL_BASE | TRL_CORECOUNT, - .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL, + .rapl_msrs = RAPL_PKG_ALL | RAPL_DRAM_ALL | RAPL_PSYS, }; static const struct platform_features slv_features = { @@ -1108,6 +1115,7 @@ enum rapl_rci_index { RAPL_RCI_INDEX_PKG_PERF_STATUS = 4, RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5, RAPL_RCI_INDEX_CORE_ENERGY = 6, + RAPL_RCI_INDEX_ENERGY_PLATFORM = 7, NUM_RAPL_COUNTERS, }; @@ -1134,6 +1142,7 @@ struct rapl_counter_info_t { struct rapl_counter_info_t *rapl_counter_info_perdomain; unsigned int rapl_counter_info_perdomain_size; +#define RAPL_COUNTER_FLAG_PLATFORM_COUNTER (1u << 0) #define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1) struct rapl_counter_arch_info { @@ -1255,6 +1264,19 @@ static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = { .compat_scale = 1.0, .flags = 0, }, + { + .feature_mask = RAPL_PSYS, + .perf_subsys = "power", + .perf_name = "energy-psys", + .msr = MSR_PLATFORM_ENERGY_STATUS, + .msr_mask = 0x00000000FFFFFFFF, + .msr_shift = 0, + .platform_rapl_msr_scale = &rapl_energy_units, + .rci_index = RAPL_RCI_INDEX_ENERGY_PLATFORM, + .bic = BIC_SysWatt | BIC_Sys_J, + .compat_scale = 1.0, + .flags = RAPL_COUNTER_FLAG_PLATFORM_COUNTER | RAPL_COUNTER_FLAG_USE_MSR_SUM, + }, }; struct rapl_counter { @@ -1682,6 +1704,7 @@ enum { IDX_PP1_ENERGY, IDX_PKG_PERF, IDX_DRAM_PERF, + IDX_PSYS_ENERGY, IDX_COUNT, }; @@ -1726,6 +1749,9 @@ off_t idx_to_offset(int idx) case IDX_DRAM_PERF: offset = MSR_DRAM_PERF_STATUS; break; + case IDX_PSYS_ENERGY: + offset = MSR_PLATFORM_ENERGY_STATUS; + break; default: offset = -1; } @@ -1756,6 +1782,9 @@ int offset_to_idx(off_t offset) case MSR_DRAM_PERF_STATUS: idx = IDX_DRAM_PERF; break; + case MSR_PLATFORM_ENERGY_STATUS: + idx = IDX_PSYS_ENERGY; + break; default: idx = -1; } @@ -1777,6 +1806,8 @@ int idx_valid(int idx) return platform->rapl_msrs & RAPL_PKG_PERF_STATUS; case IDX_DRAM_PERF: return platform->rapl_msrs & RAPL_DRAM_PERF_STATUS; + case IDX_PSYS_ENERGY: + return platform->rapl_msrs & RAPL_PSYS; default: return 0; } @@ -1848,6 +1879,10 @@ struct system_summary { struct pkg_data packages; } average; +struct platform_counters { + struct rapl_counter energy_psys; /* MSR_PLATFORM_ENERGY_STATUS */ +} platform_counters_odd, platform_counters_even; + struct cpu_topology { int physical_package_id; int die_id; @@ -2512,6 +2547,11 @@ void print_header(char *delim) ppmt = ppmt->next; } + if (DO_BIC(BIC_SysWatt)) + outp += sprintf(outp, "%sSysWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_Sys_J)) + outp += sprintf(outp, "%sSys_J", (printed++ ? delim : "")); + outp += sprintf(outp, "\n"); } @@ -2519,6 +2559,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p { int i; struct msr_counter *mp; + struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); @@ -2590,6 +2631,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value); outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value); outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value); + outp += sprintf(outp, "Joules PSYS: %0llX\n", pplat_cnt->energy_psys.raw_value); outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value); outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); @@ -2628,6 +2670,9 @@ double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desir */ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { + static int count; + + struct platform_counters *pplat_cnt = NULL; double interval_float, tsc; char *fmt8; int i; @@ -2637,6 +2682,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data char *delim = "\t"; int printed = 0; + if (t == &average.threads) { + pplat_cnt = count & 1 ? &platform_counters_odd : &platform_counters_even; + ++count; + } + /* if showing only 1st thread in core and this isn't one, bail out */ if (show_core_only && !is_cpu_first_thread_in_core(t, c, p)) return 0; @@ -3093,6 +3143,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + if (DO_BIC(BIC_SysWatt) && (t == &average.threads)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_WATTS, interval_float)); + if (DO_BIC(BIC_Sys_J) && (t == &average.threads)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), + rapl_counter_get_value(&pplat_cnt->energy_psys, RAPL_UNIT_JOULES, interval_float)); + done: if (*(outp - 1) != '\n') outp += sprintf(outp, "\n"); @@ -3400,6 +3457,11 @@ int delta_cpu(struct thread_data *t, struct core_data *c, return retval; } +void delta_platform(struct platform_counters *new, struct platform_counters *old) +{ + old->energy_psys.raw_value = new->energy_psys.raw_value - old->energy_psys.raw_value; +} + void rapl_counter_clear(struct rapl_counter *c) { c->raw_value = 0; @@ -4129,6 +4191,9 @@ static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx) { + if (rci->source[idx] == COUNTER_SOURCE_NONE) + return; + rc->raw_value = rci->data[idx]; rc->unit = rci->unit[idx]; rc->scale = rci->scale[idx]; @@ -4136,6 +4201,7 @@ void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct pkg_data *p) { + struct platform_counters *pplat_cnt = p == package_odd ? &platform_counters_odd : &platform_counters_even; unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; struct rapl_counter_info_t *rci; @@ -4163,6 +4229,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { switch (rci->source[i]) { case COUNTER_SOURCE_NONE: + rci->data[i] = 0; break; case COUNTER_SOURCE_PERF: @@ -4201,7 +4268,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct } } - BUILD_BUG_ON(NUM_RAPL_COUNTERS != 7); + BUILD_BUG_ON(NUM_RAPL_COUNTERS != 8); write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG); write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES); write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM); @@ -4209,6 +4276,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS); write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS); write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY); + write_rapl_counter(&pplat_cnt->energy_psys, rci, RAPL_RCI_INDEX_ENERGY_PLATFORM); return 0; } @@ -6144,6 +6212,7 @@ restart: re_initialize(); goto restart; } + delta_platform(&platform_counters_odd, &platform_counters_even); compute_average(EVEN_COUNTERS); format_all_counters(EVEN_COUNTERS); flush_output_stdout(); @@ -6167,6 +6236,7 @@ restart: re_initialize(); goto restart; } + delta_platform(&platform_counters_even, &platform_counters_odd); compute_average(ODD_COUNTERS); format_all_counters(ODD_COUNTERS); flush_output_stdout(); @@ -6945,8 +7015,8 @@ void rapl_probe_intel(void) unsigned long long msr; unsigned int time_unit; double tdp; - const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; - const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; + const unsigned long long bic_watt_bits = BIC_SysWatt | BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt; + const unsigned long long bic_joules_bits = BIC_Sys_J | BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J; if (rapl_joules) bic_enabled &= ~bic_watt_bits; @@ -7606,6 +7676,9 @@ void rapl_perf_init(void) domain_visited[next_domain] = 1; + if ((cai->flags & RAPL_COUNTER_FLAG_PLATFORM_COUNTER) && (cpu != base_cpu)) + continue; + struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain]; /* Check if the counter is enabled and accessible */ -- 2.51.0 From 86d237734091201d2ab2c1d2e1063893621c770f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 30 Nov 2024 16:22:00 -0500 Subject: [PATCH 02/16] tools/power turbostat: 2024.11.30 since 2024.07.26: assorted minor bug fixes assorted platform specific tweaks initial RAPL PSYS (SysWatt) support Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 +- tools/power/x86/turbostat/turbostat.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 95eb02346d3a..a7f7ed01421c 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -190,7 +190,7 @@ The system configuration dump (if --quiet is not used) is followed by statistics .PP \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. .PP -\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default. May require platform specific information to interpret the data, making it not suitable for general use. +\fBSysWatt\fP Watts consumed by the whole platform (RAPL PSYS). Disabled by default. Enable with --enable SysWatt. .PP \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary. .PP diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 88c7f896c5b2..58a487c225a7 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9236,7 +9236,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2024.07.26 - Len Brown \n"); + fprintf(outf, "turbostat version 2024.11.30 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 -- 2.51.0 From f69e63756f7822fcdad8a34f9967e8b243e883ee Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 2 Oct 2024 18:31:47 +0100 Subject: [PATCH 03/16] printf: Remove unused 'bprintf' bprintf() is unused. Remove it. It was added in the commit 4370aa4aa753 ("vsprintf: add binary printf") but as far as I can see was never used, unlike the other two functions in that patch. Link: https://lore.kernel.org/20241002173147.210107-1-linux@treblig.org Reviewed-by: Andy Shevchenko Acked-by: Petr Mladek Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Steven Rostedt (Google) --- include/linux/string.h | 1 - lib/vsprintf.c | 23 ----------------------- 2 files changed, 24 deletions(-) diff --git a/include/linux/string.h b/include/linux/string.h index 0dd27afcfaf7..493ac4862c77 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -335,7 +335,6 @@ int __sysfs_match_string(const char * const *array, size_t n, const char *s); #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf); -int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4); #endif extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 6ac02bbb7df1..9d3dac38a3f4 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -3428,29 +3428,6 @@ out: } EXPORT_SYMBOL_GPL(bstr_printf); -/** - * bprintf - Parse a format string and place args' binary value in a buffer - * @bin_buf: The buffer to place args' binary value - * @size: The size of the buffer(by words(32bits), not characters) - * @fmt: The format string to use - * @...: Arguments for the format string - * - * The function returns the number of words(u32) written - * into @bin_buf. - */ -int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) -{ - va_list args; - int ret; - - va_start(args, fmt); - ret = vbin_printf(bin_buf, size, fmt, args); - va_end(args); - - return ret; -} -EXPORT_SYMBOL_GPL(bprintf); - #endif /* CONFIG_BINARY_PRINTF */ /** -- 2.51.0 From 9022ed0e7e65734d83a0648648589b9fbea8e8c9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Dec 2024 09:23:33 -0800 Subject: [PATCH 04/16] strscpy: write destination buffer only once The point behind strscpy() was to once and for all avoid all the problems with 'strncpy()' and later broken "fixed" versions like strlcpy() that just made things worse. So strscpy not only guarantees NUL-termination (unlike strncpy), it also doesn't do unnecessary padding at the destination. But at the same time also avoids byte-at-a-time reads and writes by _allowing_ some extra NUL writes - within the size, of course - so that the whole copy can be done with word operations. It is also stable in the face of a mutable source string: it explicitly does not read the source buffer multiple times (so an implementation using "strnlen()+memcpy()" would be wrong), and does not read the source buffer past the size (like the mis-design that is strlcpy does). Finally, the return value is designed to be simple and unambiguous: if the string cannot be copied fully, it returns an actual negative error, making error handling clearer and simpler (and the caller already knows the size of the buffer). Otherwise it returns the string length of the result. However, there was one final stability issue that can be important to callers: the stability of the destination buffer. In particular, the same way we shouldn't read the source buffer more than once, we should avoid doing multiple writes to the destination buffer: first writing a potentially non-terminated string, and then terminating it with NUL at the end does not result in a stable result buffer. Yes, it gives the right result in the end, but if the rule for the destination buffer was that it is _always_ NUL-terminated even when accessed concurrently with updates, the final byte of the buffer needs to always _stay_ as a NUL byte. [ Note that "final byte is NUL" here is literally about the final byte in the destination array, not the terminating NUL at the end of the string itself. There is no attempt to try to make concurrent reads and writes give any kind of consistent string length or contents, but we do want to guarantee that there is always at least that final terminating NUL character at the end of the destination array if it existed before ] This is relevant in the kernel for the tsk->comm[] array, for example. Even without locking (for either readers or writers), we want to know that while the buffer contents may be garbled, it is always a valid C string and always has a NUL character at 'comm[TASK_COMM_LEN-1]' (and never has any "out of thin air" data). So avoid any "copy possibly non-terminated string, and terminate later" behavior, and write the destination buffer only once. Signed-off-by: Linus Torvalds --- lib/string.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/lib/string.c b/lib/string.c index 76327b51e36f..eb4486ed40d2 100644 --- a/lib/string.c +++ b/lib/string.c @@ -104,6 +104,12 @@ char *strncpy(char *dest, const char *src, size_t count) EXPORT_SYMBOL(strncpy); #endif +#ifdef __BIG_ENDIAN +# define ALLBUTLAST_BYTE_MASK (~255ul) +#else +# define ALLBUTLAST_BYTE_MASK (~0ul >> 8) +#endif + ssize_t sized_strscpy(char *dest, const char *src, size_t count) { const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS; @@ -147,13 +153,18 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count) *(unsigned long *)(dest+res) = c & zero_bytemask(data); return res + find_zero(data); } + count -= sizeof(unsigned long); + if (unlikely(!count)) { + c &= ALLBUTLAST_BYTE_MASK; + *(unsigned long *)(dest+res) = c; + return -E2BIG; + } *(unsigned long *)(dest+res) = c; res += sizeof(unsigned long); - count -= sizeof(unsigned long); max -= sizeof(unsigned long); } - while (count) { + while (count > 1) { char c; c = src[res]; @@ -164,11 +175,11 @@ ssize_t sized_strscpy(char *dest, const char *src, size_t count) count--; } - /* Hit buffer length without finding a NUL; force NUL-termination. */ - if (res) - dest[res-1] = '\0'; + /* Force NUL-termination. */ + dest[res] = '\0'; - return -E2BIG; + /* Return E2BIG if the source didn't stop */ + return src[res] ? -E2BIG : res; } EXPORT_SYMBOL(sized_strscpy); -- 2.51.0 From 40384c840ea1944d7c5a392e8975ed088ecf0b37 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Dec 2024 14:28:56 -0800 Subject: [PATCH 05/16] Linux 6.13-rc1 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index e34a97473fb6..93ab62cef244 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 -PATCHLEVEL = 12 +PATCHLEVEL = 13 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -rc1 NAME = Baby Opossum Posse # *DOCUMENTATION* -- 2.51.0 From 9986ce65bebb9a07a080d1e6ce04926b4c49ff17 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 15 Nov 2024 22:08:55 +0100 Subject: [PATCH 06/16] ata: Constify struct pci_device_id 'struct pci_device_id' is not modified in these drivers. Constifying this structure moves some data to a read-only section, so increase overall security. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 4245 1454 4 5703 1647 drivers/ata/ata_generic.o After: ===== text data bss dec hex filename 4725 974 4 5703 1647 drivers/ata/ata_generic.o Signed-off-by: Christophe JAILLET Reviewed-by: Niklas Cassel Reviewed-by: Sergey Shtylyov Link: https://lore.kernel.org/r/8bddfee7f6f0f90eeb6da7156e30ab3bd553deb1.1731704917.git.christophe.jaillet@wanadoo.fr Signed-off-by: Niklas Cassel --- drivers/ata/ata_generic.c | 2 +- drivers/ata/pata_atp867x.c | 2 +- drivers/ata/pata_piccolo.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c index 2f57ec00ab82..e70b6c089cf1 100644 --- a/drivers/ata/ata_generic.c +++ b/drivers/ata/ata_generic.c @@ -209,7 +209,7 @@ static int ata_generic_init_one(struct pci_dev *dev, const struct pci_device_id return ata_pci_bmdma_init_one(dev, ppi, &generic_sht, (void *)id, 0); } -static struct pci_device_id ata_generic[] = { +static const struct pci_device_id ata_generic[] = { { PCI_DEVICE(PCI_VENDOR_ID_PCTECH, PCI_DEVICE_ID_PCTECH_SAMURAI_IDE), }, { PCI_DEVICE(PCI_VENDOR_ID_HOLTEK, PCI_DEVICE_ID_HOLTEK_6565), }, { PCI_DEVICE(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8673F), }, diff --git a/drivers/ata/pata_atp867x.c b/drivers/ata/pata_atp867x.c index aaef5924f636..308f86f9e2f0 100644 --- a/drivers/ata/pata_atp867x.c +++ b/drivers/ata/pata_atp867x.c @@ -525,7 +525,7 @@ static int atp867x_reinit_one(struct pci_dev *pdev) } #endif -static struct pci_device_id atp867x_pci_tbl[] = { +static const struct pci_device_id atp867x_pci_tbl[] = { { PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP867A), 0 }, { PCI_VDEVICE(ARTOP, PCI_DEVICE_ID_ARTOP_ATP867B), 0 }, { }, diff --git a/drivers/ata/pata_piccolo.c b/drivers/ata/pata_piccolo.c index ced906bf56be..beb53bd990be 100644 --- a/drivers/ata/pata_piccolo.c +++ b/drivers/ata/pata_piccolo.c @@ -97,7 +97,7 @@ static int ata_tosh_init_one(struct pci_dev *dev, const struct pci_device_id *id return ata_pci_bmdma_init_one(dev, ppi, &tosh_sht, NULL, 0); } -static struct pci_device_id ata_tosh[] = { +static const struct pci_device_id ata_tosh[] = { { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1), }, { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2), }, { PCI_DEVICE(PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_3), }, -- 2.51.0 From a7f08ca7df9260bdcda8e3931c0680c24e02be0b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 11 Dec 2024 01:12:01 +0000 Subject: [PATCH 07/16] ata: sata_gemini: Remove unused gemini_sata_reset_bridge() gemini_sata_reset_bridge() was added in 2017 by the initial commit be4e456ed3a5 ("ata: Add driver for Faraday Technology FTIDE010") but has never been used. Remove it. Signed-off-by: Dr. David Alan Gilbert Signed-off-by: Damien Le Moal --- drivers/ata/sata_gemini.c | 12 ------------ drivers/ata/sata_gemini.h | 1 - 2 files changed, 13 deletions(-) diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c index d040799bf9cb..821ca31effe7 100644 --- a/drivers/ata/sata_gemini.c +++ b/drivers/ata/sata_gemini.c @@ -224,18 +224,6 @@ void gemini_sata_stop_bridge(struct sata_gemini *sg, unsigned int bridge) } EXPORT_SYMBOL(gemini_sata_stop_bridge); -int gemini_sata_reset_bridge(struct sata_gemini *sg, - unsigned int bridge) -{ - if (bridge == 0) - reset_control_reset(sg->sata0_reset); - else - reset_control_reset(sg->sata1_reset); - msleep(10); - return gemini_sata_setup_bridge(sg, bridge); -} -EXPORT_SYMBOL(gemini_sata_reset_bridge); - static int gemini_sata_bridge_init(struct sata_gemini *sg) { struct device *dev = sg->dev; diff --git a/drivers/ata/sata_gemini.h b/drivers/ata/sata_gemini.h index 6f6e691d6007..b6e4a5c86e01 100644 --- a/drivers/ata/sata_gemini.h +++ b/drivers/ata/sata_gemini.h @@ -17,6 +17,5 @@ bool gemini_sata_bridge_enabled(struct sata_gemini *sg, bool is_ata1); enum gemini_muxmode gemini_sata_get_muxmode(struct sata_gemini *sg); int gemini_sata_start_bridge(struct sata_gemini *sg, unsigned int bridge); void gemini_sata_stop_bridge(struct sata_gemini *sg, unsigned int bridge); -int gemini_sata_reset_bridge(struct sata_gemini *sg, unsigned int bridge); #endif -- 2.51.0 From 7b64859fde26ea4bb662db7401c8ebec5ac7f8b5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 11 Dec 2024 23:52:50 +0000 Subject: [PATCH 08/16] ata: sata_gemini: Remove remaining reset glue Now that gemini_sata_reset_bridge() is gone, we can remove the sata0/1_reset members and the code that creates them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Linus Walleij Signed-off-by: Damien Le Moal --- drivers/ata/sata_gemini.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/ata/sata_gemini.c b/drivers/ata/sata_gemini.c index 821ca31effe7..530ee26b3012 100644 --- a/drivers/ata/sata_gemini.c +++ b/drivers/ata/sata_gemini.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -27,8 +26,6 @@ * @muxmode: the current muxing mode * @ide_pins: if the device is using the plain IDE interface pins * @sata_bridge: if the device enables the SATA bridge - * @sata0_reset: SATA0 reset handler - * @sata1_reset: SATA1 reset handler * @sata0_pclk: SATA0 PCLK handler * @sata1_pclk: SATA1 PCLK handler */ @@ -38,8 +35,6 @@ struct sata_gemini { enum gemini_muxmode muxmode; bool ide_pins; bool sata_bridge; - struct reset_control *sata0_reset; - struct reset_control *sata1_reset; struct clk *sata0_pclk; struct clk *sata1_pclk; }; @@ -253,21 +248,6 @@ static int gemini_sata_bridge_init(struct sata_gemini *sg) return ret; } - sg->sata0_reset = devm_reset_control_get_exclusive(dev, "sata0"); - if (IS_ERR(sg->sata0_reset)) { - dev_err(dev, "no SATA0 reset controller\n"); - clk_disable_unprepare(sg->sata1_pclk); - clk_disable_unprepare(sg->sata0_pclk); - return PTR_ERR(sg->sata0_reset); - } - sg->sata1_reset = devm_reset_control_get_exclusive(dev, "sata1"); - if (IS_ERR(sg->sata1_reset)) { - dev_err(dev, "no SATA1 reset controller\n"); - clk_disable_unprepare(sg->sata1_pclk); - clk_disable_unprepare(sg->sata0_pclk); - return PTR_ERR(sg->sata1_reset); - } - sata_id = readl(sg->base + GEMINI_SATA_ID); sata_phy_id = readl(sg->base + GEMINI_SATA_PHY_ID); sg->sata_bridge = true; -- 2.51.0 From 8c87215dd3a2c814dcffc0bafe8c80c8f98f2574 Mon Sep 17 00:00:00 2001 From: Josua Mayer Date: Wed, 1 Jan 2025 13:13:33 +0100 Subject: [PATCH 09/16] ata: libahci_platform: support non-consecutive port numbers So far ahci_platform relied on number of child nodes in firmware to allocate arrays and expected port numbers to start from 0 without holes. This number of ports is then set in private structure for use when configuring phys and regulators. Some platforms may not use every port of an ahci controller. E.g. SolidRUN CN9130 Clearfog uses only port 1 but not port 0, leading to the following errors during boot: [ 1.719476] ahci f2540000.sata: invalid port number 1 [ 1.724562] ahci f2540000.sata: No port enabled Update all accessesors of ahci_host_priv phys and target_pwrs arrays to support holes. Access is gated by hpriv->mask_port_map which has a bit set for each enabled port. Update ahci_platform_get_resources to ignore holes in the port numbers and enable ports defined in firmware by their reg property only. When firmware does not define children it is assumed that there is exactly one port, using index 0. Signed-off-by: Josua Mayer Reviewed-by: Hans de Goede Signed-off-by: Damien Le Moal --- drivers/ata/ahci_brcm.c | 3 +++ drivers/ata/ahci_ceva.c | 6 +++++ drivers/ata/libahci_platform.c | 40 +++++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index ef569eae4ce4..24c471b485ab 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -288,6 +288,9 @@ static unsigned int brcm_ahci_read_id(struct ata_device *dev, /* Re-initialize and calibrate the PHY */ for (i = 0; i < hpriv->nports; i++) { + if (!(hpriv->mask_port_map & (1 << i))) + continue; + rc = phy_init(hpriv->phys[i]); if (rc) goto disable_phys; diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index 1ec35778903d..f2e20ed11ec7 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -206,6 +206,9 @@ static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) goto disable_clks; for (i = 0; i < hpriv->nports; i++) { + if (!(hpriv->mask_port_map & (1 << i))) + continue; + rc = phy_init(hpriv->phys[i]); if (rc) goto disable_rsts; @@ -215,6 +218,9 @@ static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) ahci_platform_deassert_rsts(hpriv); for (i = 0; i < hpriv->nports; i++) { + if (!(hpriv->mask_port_map & (1 << i))) + continue; + rc = phy_power_on(hpriv->phys[i]); if (rc) { phy_exit(hpriv->phys[i]); diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 7a8064520a35..b68777841f7a 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -49,6 +49,9 @@ int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) int rc, i; for (i = 0; i < hpriv->nports; i++) { + if (!(hpriv->mask_port_map & (1 << i))) + continue; + rc = phy_init(hpriv->phys[i]); if (rc) goto disable_phys; @@ -70,6 +73,9 @@ int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) disable_phys: while (--i >= 0) { + if (!(hpriv->mask_port_map & (1 << i))) + continue; + phy_power_off(hpriv->phys[i]); phy_exit(hpriv->phys[i]); } @@ -88,6 +94,9 @@ void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) int i; for (i = 0; i < hpriv->nports; i++) { + if (!(hpriv->mask_port_map & (1 << i))) + continue; + phy_power_off(hpriv->phys[i]); phy_exit(hpriv->phys[i]); } @@ -432,6 +441,20 @@ static int ahci_platform_get_firmware(struct ahci_host_priv *hpriv, return 0; } +static u32 ahci_platform_find_max_port_id(struct device *dev) +{ + u32 max_port = 0; + + for_each_child_of_node_scoped(dev->of_node, child) { + u32 port; + + if (!of_property_read_u32(child, "reg", &port)) + max_port = max(max_port, port); + } + + return max_port; +} + /** * ahci_platform_get_resources - Get platform resources * @pdev: platform device to get resources for @@ -458,6 +481,7 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, struct device *dev = &pdev->dev; struct ahci_host_priv *hpriv; u32 mask_port_map = 0; + u32 max_port; if (!devres_open_group(dev, NULL, GFP_KERNEL)) return ERR_PTR(-ENOMEM); @@ -549,15 +573,17 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, goto err_out; } + /* find maximum port id for allocating structures */ + max_port = ahci_platform_find_max_port_id(dev); /* - * If no sub-node was found, we still need to set nports to - * one in order to be able to use the + * Set nports according to maximum port id. Clamp at + * AHCI_MAX_PORTS, warning message for invalid port id + * is generated later. + * When DT has no sub-nodes max_port is 0, nports is 1, + * in order to be able to use the * ahci_platform_[en|dis]able_[phys|regulators] functions. */ - if (child_nodes) - hpriv->nports = child_nodes; - else - hpriv->nports = 1; + hpriv->nports = min(AHCI_MAX_PORTS, max_port + 1); hpriv->phys = devm_kcalloc(dev, hpriv->nports, sizeof(*hpriv->phys), GFP_KERNEL); if (!hpriv->phys) { @@ -625,6 +651,8 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, * If no sub-node was found, keep this for device tree * compatibility */ + hpriv->mask_port_map |= BIT(0); + rc = ahci_platform_get_phy(hpriv, 0, dev, dev->of_node); if (rc) goto err_out; -- 2.51.0 From c9b5be909e6595547ed5d45aef39fd65948aa342 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 6 Jan 2025 14:14:47 +0900 Subject: [PATCH 10/16] ahci: Introduce ahci_ignore_port() helper libahci and AHCI drivers may ignore some ports if the port is invalid (its ID does not correspond to a valid physical port) or if the user explicitly requested the port to be ignored with the mask_port_map ahci module parameter. Such port that shall be ignored can be identified by checking that the bit corresponding to the port ID is not set in the mask_port_map field of struct ahci_host_priv. E.g. code such as: "if (!(hpriv->mask_port_map & (1 << portid)))". Replace all direct use of the mask_port_map field to detect such port with the new helper inline function ahci_ignore_port() to make the code more readable/easier to understand. The comment describing the mask_port_map field of struct ahci_host_priv is also updated to be more accurate. Signed-off-by: Damien Le Moal Reviewed-by: Niklas Cassel --- drivers/ata/ahci.h | 13 ++++++++++++- drivers/ata/ahci_brcm.c | 2 +- drivers/ata/ahci_ceva.c | 4 ++-- drivers/ata/libahci_platform.c | 6 +++--- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 8f40f75ba08c..aea30df50c58 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -328,7 +328,7 @@ struct ahci_port_priv { struct ahci_host_priv { /* Input fields */ unsigned int flags; /* AHCI_HFLAG_* */ - u32 mask_port_map; /* mask out particular bits */ + u32 mask_port_map; /* Mask of valid ports */ void __iomem * mmio; /* bus-independent mem map */ u32 cap; /* cap to use */ @@ -379,6 +379,17 @@ struct ahci_host_priv { int port); }; +/* + * Return true if a port should be ignored because it is excluded from + * the host port map. + */ +static inline bool ahci_ignore_port(struct ahci_host_priv *hpriv, + unsigned int portid) +{ + return portid >= hpriv->nports || + !(hpriv->mask_port_map & (1 << portid)); +} + extern int ahci_ignore_sss; extern const struct attribute_group *ahci_shost_groups[]; diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index 24c471b485ab..29be74fedcf0 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -288,7 +288,7 @@ static unsigned int brcm_ahci_read_id(struct ata_device *dev, /* Re-initialize and calibrate the PHY */ for (i = 0; i < hpriv->nports; i++) { - if (!(hpriv->mask_port_map & (1 << i))) + if (ahci_ignore_port(hpriv, i)) continue; rc = phy_init(hpriv->phys[i]); diff --git a/drivers/ata/ahci_ceva.c b/drivers/ata/ahci_ceva.c index f2e20ed11ec7..2d6a08c23d6a 100644 --- a/drivers/ata/ahci_ceva.c +++ b/drivers/ata/ahci_ceva.c @@ -206,7 +206,7 @@ static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) goto disable_clks; for (i = 0; i < hpriv->nports; i++) { - if (!(hpriv->mask_port_map & (1 << i))) + if (ahci_ignore_port(hpriv, i)) continue; rc = phy_init(hpriv->phys[i]); @@ -218,7 +218,7 @@ static int ceva_ahci_platform_enable_resources(struct ahci_host_priv *hpriv) ahci_platform_deassert_rsts(hpriv); for (i = 0; i < hpriv->nports; i++) { - if (!(hpriv->mask_port_map & (1 << i))) + if (ahci_ignore_port(hpriv, i)) continue; rc = phy_power_on(hpriv->phys[i]); diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index b68777841f7a..53b2c7719dc5 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -49,7 +49,7 @@ int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) int rc, i; for (i = 0; i < hpriv->nports; i++) { - if (!(hpriv->mask_port_map & (1 << i))) + if (ahci_ignore_port(hpriv, i)) continue; rc = phy_init(hpriv->phys[i]); @@ -73,7 +73,7 @@ int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) disable_phys: while (--i >= 0) { - if (!(hpriv->mask_port_map & (1 << i))) + if (ahci_ignore_port(hpriv, i)) continue; phy_power_off(hpriv->phys[i]); @@ -94,7 +94,7 @@ void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) int i; for (i = 0; i < hpriv->nports; i++) { - if (!(hpriv->mask_port_map & (1 << i))) + if (ahci_ignore_port(hpriv, i)) continue; phy_power_off(hpriv->phys[i]); -- 2.51.0 From f2809aa4f591d98e4c560a23d7eaca804a8afc54 Mon Sep 17 00:00:00 2001 From: Raphael Gallais-Pou Date: Thu, 9 Jan 2025 18:54:27 +0100 Subject: [PATCH 11/16] ahci: st: Switch from CONFIG_PM_SLEEP guards to pm_sleep_ptr() Letting the compiler remove these functions when the kernel is built without CONFIG_PM_SLEEP support is simpler and less error prone than the use of #ifdef based kernel configuration guards. Signed-off-by: Raphael Gallais-Pou Reviewed-by: Niklas Cassel Link: https://lore.kernel.org/r/20250109175427.64384-1-rgallaispou@gmail.com Signed-off-by: Niklas Cassel --- drivers/ata/ahci_st.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/ata/ahci_st.c b/drivers/ata/ahci_st.c index 6b9b4a1dfa15..4336c8a6e208 100644 --- a/drivers/ata/ahci_st.c +++ b/drivers/ata/ahci_st.c @@ -176,7 +176,6 @@ static int st_ahci_probe(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM_SLEEP static int st_ahci_suspend(struct device *dev) { struct ata_host *host = dev_get_drvdata(dev); @@ -221,9 +220,8 @@ static int st_ahci_resume(struct device *dev) return ahci_platform_resume_host(dev); } -#endif -static SIMPLE_DEV_PM_OPS(st_ahci_pm_ops, st_ahci_suspend, st_ahci_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(st_ahci_pm_ops, st_ahci_suspend, st_ahci_resume); static const struct of_device_id st_ahci_match[] = { { .compatible = "st,ahci", }, @@ -234,7 +232,7 @@ MODULE_DEVICE_TABLE(of, st_ahci_match); static struct platform_driver st_ahci_driver = { .driver = { .name = DRV_NAME, - .pm = &st_ahci_pm_ops, + .pm = pm_sleep_ptr(&st_ahci_pm_ops), .of_match_table = st_ahci_match, }, .probe = st_ahci_probe, -- 2.51.0 From cc77e2ce187d26cc66af3577bf896d7410eb25ab Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 Jan 2025 06:36:43 +0100 Subject: [PATCH 12/16] ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives Disabling link power management on Samsung SSD 870 QVO drives to make them work again after the switch of the default LPM policy to low. Testing so far has shown that regular Samsung SSD 870 (the non QVO variants) do not need it and work fine with the default LPM policy. Cc: stable@vger.kernel.org Fixes: 7627a0edef54 ("ata: ahci: Drop low power policy board type") Signed-off-by: Daniel Baumann Link: https://lore.kernel.org/linux-ide/ac64a484-022c-42a0-95bc-1520333b1536@debian.org/ Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c085dd81ebe7..63ec2f218431 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4143,6 +4143,10 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "Samsung SSD 860*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, + { "Samsung SSD 870 QVO*", NULL, ATA_QUIRK_NO_NCQ_TRIM | + ATA_QUIRK_ZERO_AFTER_TRIM | + ATA_QUIRK_NO_NCQ_ON_ATI | + ATA_QUIRK_NOLPM }, { "Samsung SSD 870*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, -- 2.51.0 From 6e74e53b34b6dec5a50e1404e2680852ec6768d2 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 27 Jan 2025 16:43:04 +0100 Subject: [PATCH 13/16] ata: libata-sff: Ensure that we cannot write outside the allocated buffer reveliofuzzing reported that a SCSI_IOCTL_SEND_COMMAND ioctl with out_len set to 0xd42, SCSI command set to ATA_16 PASS-THROUGH, ATA command set to ATA_NOP, and protocol set to ATA_PROT_PIO, can cause ata_pio_sector() to write outside the allocated buffer, overwriting random memory. While a ATA device is supposed to abort a ATA_NOP command, there does seem to be a bug either in libata-sff or QEMU, where either this status is not set, or the status is cleared before read by ata_sff_hsm_move(). Anyway, that is most likely a separate bug. Looking at __atapi_pio_bytes(), it already has a safety check to ensure that __atapi_pio_bytes() cannot write outside the allocated buffer. Add a similar check to ata_pio_sector(), such that also ata_pio_sector() cannot write outside the allocated buffer. Cc: stable@vger.kernel.org Reported-by: reveliofuzzing Closes: https://lore.kernel.org/linux-ide/CA+-ZZ_jTgxh3bS7m+KX07_EWckSnW3N2adX3KV63y4g7M4CZ2A@mail.gmail.com/ Link: https://lore.kernel.org/r/20250127154303.15567-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libata-sff.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index 67f277e1c3bf..5a46c066abc3 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -601,7 +601,7 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; struct page *page; - unsigned int offset; + unsigned int offset, count; if (!qc->cursg) { qc->curbytes = qc->nbytes; @@ -617,25 +617,27 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) page = nth_page(page, (offset >> PAGE_SHIFT)); offset %= PAGE_SIZE; - trace_ata_sff_pio_transfer_data(qc, offset, qc->sect_size); + /* don't overrun current sg */ + count = min(qc->cursg->length - qc->cursg_ofs, qc->sect_size); + + trace_ata_sff_pio_transfer_data(qc, offset, count); /* * Split the transfer when it splits a page boundary. Note that the * split still has to be dword aligned like all ATA data transfers. */ WARN_ON_ONCE(offset % 4); - if (offset + qc->sect_size > PAGE_SIZE) { + if (offset + count > PAGE_SIZE) { unsigned int split_len = PAGE_SIZE - offset; ata_pio_xfer(qc, page, offset, split_len); - ata_pio_xfer(qc, nth_page(page, 1), 0, - qc->sect_size - split_len); + ata_pio_xfer(qc, nth_page(page, 1), 0, count - split_len); } else { - ata_pio_xfer(qc, page, offset, qc->sect_size); + ata_pio_xfer(qc, page, offset, count); } - qc->curbytes += qc->sect_size; - qc->cursg_ofs += qc->sect_size; + qc->curbytes += count; + qc->cursg_ofs += count; if (qc->cursg_ofs == qc->cursg->length) { qc->cursg = sg_next(qc->cursg); -- 2.51.0 From 2c202e6c4f4dd19d2e8c1dfac9df05170aa3934f Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Sat, 8 Feb 2025 08:29:15 +0900 Subject: [PATCH 14/16] ata: libahci_platform: Do not set mask_port_map when not needed Commit 8c87215dd3a2 ("ata: libahci_platform: support non-consecutive port numbers") modified ahci_platform_get_resources() to allow identifying the ports of a controller that are defined as child nodes of the controller node in order to support non-consecutive port numbers (as defined by the platform device tree). However, this commit also erroneously sets bit 0 of hpriv->mask_port_map when the platform devices tree does not define port child nodes, to match the fact that the temporary default number of ports used in that case is 1 (which is also consistent with the fact that only index 0 of hpriv->phys[] is initialized with the call to ahci_platform_get_phy(). But doing so causes ahci_platform_init_host() to initialize and probe only the first port, even if this function determines that the controller has in fact multiple ports using the capability register of the controller (through a call to ahci_nr_ports()). This can be seen with the ahci_mvebu driver (Armada 385 SoC) with the second port declared as "dummy": ahci-mvebu f10a8000.sata: masking port_map 0x3 -> 0x1 ahci-mvebu f10a8000.sata: AHCI vers 0001.0000, 32 command slots, 6 Gbps, platform mode ahci-mvebu f10a8000.sata: 1/2 ports implemented (port mask 0x1) ahci-mvebu f10a8000.sata: flags: 64bit ncq sntf led only pmp fbs pio slum part sxs scsi host0: ahci-mvebu scsi host1: ahci-mvebu ata1: SATA max UDMA/133 mmio [mem 0xf10a8000-0xf10a9fff] port 0x100 irq 40 lpm-pol 0 ata2: DUMMY Fix this issue by removing setting bit 0 of hpriv->mask_port_map when the platform device tree does not define port child nodes. Reported-by: Klaus Kudielka Fixes: 8c87215dd3a2 ("ata: libahci_platform: support non-consecutive port numbers") Tested-by: Klaus Kudielka Signed-off-by: Damien Le Moal Acked-by: Josua Mayer Link: https://lore.kernel.org/r/20250207232915.1439174-1-dlemoal@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libahci_platform.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 53b2c7719dc5..91d44302eac9 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -651,8 +651,6 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, * If no sub-node was found, keep this for device tree * compatibility */ - hpriv->mask_port_map |= BIT(0); - rc = ahci_platform_get_phy(hpriv, 0, dev, dev->of_node); if (rc) goto err_out; -- 2.51.0 From 130ff5c8b78e6fd05270a04985c50bce6a3de6c1 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Tue, 25 Feb 2025 15:16:12 +0100 Subject: [PATCH 15/16] ata: ahci: Make ahci_ignore_port() handle empty mask_port_map MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Commit 8c87215dd3a2 ("ata: libahci_platform: support non-consecutive port numbers") added a skip to ahci_platform_enable_phys() for ports that are not in mask_port_map. The code in ahci_platform_get_resources(), will currently set mask_port_map for each child "port" node it finds in the device tree. However, device trees that do not have any child "port" nodes will not have mask_port_map set, and for non-device tree platforms mask_port_map will only exist as a quirk for specific PCI device + vendor IDs, or as a kernel module parameter, but will not be set by default. Therefore, the common thing is that mask_port_map is only set if you do not want to use all ports (as defined by Offset 0Ch: PI – Ports Implemented register), but instead only want to use the ports in mask_port_map. If mask_port_map is not set, all ports are available. Thus, ahci_ignore_port() must be able to handle an empty mask_port_map. Fixes: 8c87215dd3a2 ("ata: libahci_platform: support non-consecutive port numbers") Fixes: 2c202e6c4f4d ("ata: libahci_platform: Do not set mask_port_map when not needed") Fixes: c9b5be909e65 ("ahci: Introduce ahci_ignore_port() helper") Reported-by: Marek Szyprowski Closes: https://lore.kernel.org/linux-ide/10b31dd0-d0bb-4f76-9305-2195c3e17670@samsung.com/ Tested-by: Marek Szyprowski Co-developed-by: Damien Le Moal Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20250225141612.942170-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/ahci.h | 8 ++++++-- drivers/ata/libahci.c | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index aea30df50c58..b2e0ef4efbdc 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -386,8 +386,12 @@ struct ahci_host_priv { static inline bool ahci_ignore_port(struct ahci_host_priv *hpriv, unsigned int portid) { - return portid >= hpriv->nports || - !(hpriv->mask_port_map & (1 << portid)); + if (portid >= hpriv->nports) + return true; + /* mask_port_map not set means that all ports are available */ + if (!hpriv->mask_port_map) + return false; + return !(hpriv->mask_port_map & (1 << portid)); } extern int ahci_ignore_sss; diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index fdfa7b266218..e7ace4b10f15 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -541,6 +541,7 @@ void ahci_save_initial_config(struct device *dev, struct ahci_host_priv *hpriv) hpriv->saved_port_map = port_map; } + /* mask_port_map not set means that all ports are available */ if (hpriv->mask_port_map) { dev_warn(dev, "masking port_map 0x%lx -> 0x%lx\n", port_map, -- 2.51.0 From a2f925a2f62254119cdaa360cfc9c0424bccd531 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Fri, 28 Feb 2025 13:26:04 +0100 Subject: [PATCH 16/16] Revert "ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives" This reverts commit cc77e2ce187d26cc66af3577bf896d7410eb25ab. It was reported that adding ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives breaks entering lower package states for certain systems. It turns out that Samsung SSD 870 QVO actually has working LPM when using a recent SSD firmware version. The author of commit cc77e2ce187d ("ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives") reported himself that only older SSD firmware versions have broken LPM: https://lore.kernel.org/stable/93c10d38-718c-459d-84a5-4d87680b4da7@debian.org/ Unfortunately, he did not specify which older firmware version he was using which had broken LPM. Let's revert this quirk, which has FW version field specified as NULL (which means that it applies for all Samsung SSD 870 QVO firmware versions) for now. Once the author reports which older firmware version(s) that are broken, we can create a more fine grained quirk, which populates the FW version field accordingly. Fixes: cc77e2ce187d ("ata: libata-core: Add ATA_QUIRK_NOLPM for Samsung SSD 870 QVO drives") Reported-by: Dieter Mummenschanz Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219747 Link: https://lore.kernel.org/r/20250228122603.91814-2-cassel@kernel.org Signed-off-by: Niklas Cassel --- drivers/ata/libata-core.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 63ec2f218431..c085dd81ebe7 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4143,10 +4143,6 @@ static const struct ata_dev_quirks_entry __ata_dev_quirks[] = { { "Samsung SSD 860*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, - { "Samsung SSD 870 QVO*", NULL, ATA_QUIRK_NO_NCQ_TRIM | - ATA_QUIRK_ZERO_AFTER_TRIM | - ATA_QUIRK_NO_NCQ_ON_ATI | - ATA_QUIRK_NOLPM }, { "Samsung SSD 870*", NULL, ATA_QUIRK_NO_NCQ_TRIM | ATA_QUIRK_ZERO_AFTER_TRIM | ATA_QUIRK_NO_NCQ_ON_ATI }, -- 2.51.0