From ea8614c08d7f725729910f464c01577d036f38f5 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Wed, 7 Aug 2024 13:43:39 +0200 Subject: [PATCH 01/16] tools/power turbostat: Fix column printing for PMT xtal_time counters If the very first printed column was for a PMT counter of type xtal_time we would misalign the column header, because we were always printing the delimiter. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 00674f7abdf5..80ac40638307 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2291,7 +2291,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: - outp += sprintf(outp, "%s%s", delim, ppmt->name); + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2365,7 +2365,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: - outp += sprintf(outp, "%s%s", delim, ppmt->name); + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } @@ -2496,7 +2496,7 @@ void print_header(char *delim) break; case PMT_TYPE_XTAL_TIME: - outp += sprintf(outp, "%s%s", delim, ppmt->name); + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), ppmt->name); break; } -- 2.50.1 From ae2cdf8d92ffc326104524a1f9da4cf75b6ea996 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Tue, 20 Aug 2024 18:47:59 +0200 Subject: [PATCH 02/16] tools/power turbostat: Allow using cpu device in perf counters on hybrid platforms Intel hybrid platforms expose different perf devices for P and E cores. Instead of one, "/sys/bus/event_source/devices/cpu" device, there are "/sys/bus/event_source/devices/{cpu_core,cpu_atom}". This, however makes it more complicated for the user, because most of the counters are available on both and had to be handled manually. This patch allows users to use "virtual" cpu device that is seemingly translated to cpu_core and cpu_atom perf devices, depending on the type of a CPU we are opening the counter for. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 25 ++++++ tools/power/x86/turbostat/turbostat.c | 105 ++++++++++++++++++++++++-- 2 files changed, 123 insertions(+), 7 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 067717bce1d4..56c7ff6efcda 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -33,6 +33,9 @@ name as necessary to disambiguate it from others is necessary. Note that option msr0xXXX is a hex offset, eg. msr0x10 /sys/path... is an absolute path to a sysfs attribute is a perf device from /sys/bus/event_source/devices/ eg. cstate_core + On Intel hybrid platforms, instead of one "cpu" perf device there are two, "cpu_core" and "cpu_atom" devices for P and E cores respectively. + Turbostat, in this case, allow user to use "cpu" device and will automatically detect the type of a CPU and translate it to "cpu_core" and "cpu_atom" accordingly. + For a complete example see "ADD PERF COUNTER EXAMPLE #2 (using virtual "cpu" device)". is a perf event for given device from /sys/bus/event_source/devices//events/ eg. c1-residency perf/cstate_core/c1-residency would then use /sys/bus/event_source/devices/cstate_core/events/c1-residency @@ -387,6 +390,28 @@ CPU pCPU%c1 CPU%c1 .fi +.SH ADD PERF COUNTER EXAMPLE #2 (using virtual cpu device) +Here we run on hybrid, Raptor Lake platform. +We limit turbostat to show output for just cpu0 (pcore) and cpu12 (ecore). +We add a counter showing number of L3 cache misses, using virtual "cpu" device, +labeling it with the column header, "VCMISS". +We add a counter showing number of L3 cache misses, using virtual "cpu_core" device, +labeling it with the column header, "PCMISS". This will fail on ecore cpu12. +We add a counter showing number of L3 cache misses, using virtual "cpu_atom" device, +labeling it with the column header, "ECMISS". This will fail on pcore cpu0. +We display it only once, after the conclusion of 0.1 second sleep. +.nf +sudo ./turbostat --quiet --cpu 0,12 --show CPU --add perf/cpu/cache-misses,cpu,delta,raw,VCMISS --add perf/cpu_core/cache-misses,cpu,delta,raw,PCMISS --add perf/cpu_atom/cache-misses,cpu,delta,raw,ECMISS sleep .1 +turbostat: added_perf_counters_init_: perf/cpu_atom/cache-misses: failed to open counter on cpu0 +turbostat: added_perf_counters_init_: perf/cpu_core/cache-misses: failed to open counter on cpu12 +0.104630 sec +CPU ECMISS PCMISS VCMISS +- 0x0000000000000000 0x0000000000000000 0x0000000000000000 +0 0x0000000000000000 0x0000000000007951 0x0000000000007796 +12 0x000000000001137a 0x0000000000000000 0x0000000000011392 + +.fi + .SH ADD PMT COUNTER EXAMPLE Here we limit turbostat to showing just the CPU number 0. We add two counters, showing crystal clock count and the DC6 residency. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 80ac40638307..462d821eaf41 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -31,6 +31,9 @@ ) // end copied section +#define CPUID_LEAF_MODEL_ID 0x1A +#define CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT 24 + #define X86_VENDOR_INTEL 0 #include INTEL_FAMILY_HEADER @@ -89,6 +92,9 @@ #define PERF_DEV_NAME_BYTES 32 #define PERF_EVT_NAME_BYTES 32 +#define INTEL_ECORE_TYPE 0x20 +#define INTEL_PCORE_TYPE 0x40 + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; @@ -1848,6 +1854,7 @@ struct cpu_topology { int logical_node_id; /* 0-based count within the package */ int physical_core_id; int thread_id; + int type; cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ } *cpus; @@ -5653,6 +5660,32 @@ int init_thread_id(int cpu) return 0; } +int set_my_cpu_type(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int max_level; + + __cpuid(0, max_level, ebx, ecx, edx); + + if (max_level < CPUID_LEAF_MODEL_ID) + return 0; + + __cpuid(CPUID_LEAF_MODEL_ID, eax, ebx, ecx, edx); + + return (eax >> CPUID_LEAF_MODEL_ID_CORE_TYPE_SHIFT); +} + +int set_cpu_hybrid_type(int cpu) +{ + if (cpu_migrate(cpu)) + return -1; + + int type = set_my_cpu_type(); + + cpus[cpu].type = type; + return 0; +} + /* * snapshot_proc_interrupts() * @@ -8281,6 +8314,8 @@ void topology_probe(bool startup) for_all_proc_cpus(init_thread_id); + for_all_proc_cpus(set_cpu_hybrid_type); + /* * For online cpus * find max_core_id, max_package_id @@ -8545,6 +8580,35 @@ void check_perf_access(void) bic_enabled &= ~BIC_IPC; } +bool perf_has_hybrid_devices(void) +{ + /* + * 0: unknown + * 1: has separate perf device for p and e core + * -1: doesn't have separate perf device for p and e core + */ + static int cached; + + if (cached > 0) + return true; + + if (cached < 0) + return false; + + if (access("/sys/bus/event_source/devices/cpu_core", F_OK)) { + cached = -1; + return false; + } + + if (access("/sys/bus/event_source/devices/cpu_atom", F_OK)) { + cached = -1; + return false; + } + + cached = 1; + return true; +} + int added_perf_counters_init_(struct perf_counter_info *pinfo) { size_t num_domains = 0; @@ -8601,29 +8665,56 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) if (domain_visited[next_domain]) continue; - perf_type = read_perf_type(pinfo->device); + /* + * Intel hybrid platforms expose different perf devices for P and E cores. + * Instead of one, "/sys/bus/event_source/devices/cpu" device, there are + * "/sys/bus/event_source/devices/{cpu_core,cpu_atom}". + * + * This makes it more complicated to the user, because most of the counters + * are available on both and have to be handled manually, otherwise. + * + * Code below, allow user to use the old "cpu" name, which is translated accordingly. + */ + const char *perf_device = pinfo->device; + + if (strcmp(perf_device, "cpu") == 0 && perf_has_hybrid_devices()) { + switch (cpus[cpu].type) { + case INTEL_PCORE_TYPE: + perf_device = "cpu_core"; + break; + + case INTEL_ECORE_TYPE: + perf_device = "cpu_atom"; + break; + + default: /* Don't change, we will probably fail and report a problem soon. */ + break; + } + } + + perf_type = read_perf_type(perf_device); if (perf_type == (unsigned int)-1) { warnx("%s: perf/%s/%s: failed to read %s", - __func__, pinfo->device, pinfo->event, "type"); + __func__, perf_device, pinfo->event, "type"); continue; } - perf_config = read_perf_config(pinfo->device, pinfo->event); + perf_config = read_perf_config(perf_device, pinfo->event); if (perf_config == (unsigned int)-1) { warnx("%s: perf/%s/%s: failed to read %s", - __func__, pinfo->device, pinfo->event, "config"); + __func__, perf_device, pinfo->event, "config"); continue; } /* Scale is not required, some counters just don't have it. */ - perf_scale = read_perf_scale(pinfo->device, pinfo->event); + perf_scale = read_perf_scale(perf_device, pinfo->event); if (perf_scale == 0.0) perf_scale = 1.0; fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); if (fd_perf == -1) { warnx("%s: perf/%s/%s: failed to open counter on cpu%d", - __func__, pinfo->device, pinfo->event, cpu); + __func__, perf_device, pinfo->event, cpu); continue; } @@ -8633,7 +8724,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) if (debug) fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", - pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + perf_device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); } pinfo = pinfo->next; -- 2.50.1 From fed8511cc8996989178823052dc0200643e1389a Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Tue, 27 Aug 2024 13:07:51 +0800 Subject: [PATCH 03/16] tools/power turbostat: Fix trailing '\n' parsing parse_cpu_string() parses the string input either from command line or from /sys/fs/cgroup/cpuset.cpus.effective to get a list of CPUs that turbostat can run with. The cpu string returned by /sys/fs/cgroup/cpuset.cpus.effective contains a trailing '\n', but strtoul() fails to treat this as an error. That says, for the code below val = ("\n", NULL, 10); val returns 0, and errno is also not set. As a result, CPU0 is erroneously considered as allowed CPU and this causes failures when turbostat tries to run on CPU0. get_counters: Could not migrate to CPU 0 ... turbostat: re-initialized with num_cpus 8, allowed_cpus 5 get_counters: Could not migrate to CPU 0 Add a check to return immediately if '\n' or '\0' is detected. Fixes: 8c3dd2c9e542 ("tools/power/turbostat: Abstrct function for parsing cpu string") Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 462d821eaf41..b4386d54e65d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5389,6 +5389,9 @@ static int parse_cpu_str(char *cpu_str, cpu_set_t *cpu_set, int cpu_set_size) if (*next == '-') /* no negative cpu numbers */ return 1; + if (*next == '\0' || *next == '\n') + break; + start = strtoul(next, &next, 10); if (start >= CPU_SUBSET_MAXCPUS) -- 2.50.1 From c808624e2db2234991d948aa9bb9cd05bc3851a9 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Tue, 17 Sep 2024 22:33:26 +0200 Subject: [PATCH 04/16] tools/power turbostat: Honor --show CPU, even when even when num_cpus=1 Honor --show CPU and --show Core when "topo.num_cpus == 1". Previously turbostat assumed that on a 1-CPU system, these columns should never appear. Honoring these flags makes it easier for several programs that parse turbostat output. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b4386d54e65d..924f14e1ec35 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -8229,7 +8229,7 @@ void topology_probe(bool startup) set_max_cpu_num(); topo.num_cpus = 0; for_all_proc_cpus(count_cpus); - if (!summary_only && topo.num_cpus > 1) + if (!summary_only) BIC_PRESENT(BIC_CPU); if (debug > 1) @@ -8367,7 +8367,7 @@ void topology_probe(bool startup) topo.cores_per_node = max_core_id + 1; if (debug > 1) fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); - if (!summary_only && topo.cores_per_node > 1) + if (!summary_only) BIC_PRESENT(BIC_Core); topo.num_die = topo.max_die_id + 1; -- 2.50.1 From f5e2cf228f185fe3ede98caf2b28e8a1c2103262 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 14 Nov 2024 15:59:37 +0800 Subject: [PATCH 05/16] tools/power turbostat: Remove PC7/PC9 support on MTL Similar to ADL/RPL, MTL support CC1/CC6/CC7/PC2/PC3/PC6/PC8/CP10. Remove PC7/PC9 support on MTL. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 924f14e1ec35..f6a91f0b658b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1009,8 +1009,8 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_RAPTORLAKE, &adl_features }, { INTEL_RAPTORLAKE_P, &adl_features }, { INTEL_RAPTORLAKE_S, &adl_features }, - { INTEL_METEORLAKE, &cnl_features }, - { INTEL_METEORLAKE_L, &cnl_features }, + { INTEL_METEORLAKE, &adl_features }, + { INTEL_METEORLAKE_L, &adl_features }, { INTEL_ARROWLAKE_H, &arl_features }, { INTEL_ARROWLAKE_U, &arl_features }, { INTEL_ARROWLAKE, &arl_features }, -- 2.50.1 From b082e07aec468c4564ceff83a6739d2407d1979d Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 14 Nov 2024 15:59:38 +0800 Subject: [PATCH 06/16] tools/power turbostat: Add back PC8 support on Arrowlake Similar to ADL/RPL/MTL, ARL supports CC1/CC6/CC7/PC2/PC3/PC6/PC8/PC10. Add back PC8 support on Arrowlake. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f6a91f0b658b..0ba2564f512a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1011,9 +1011,9 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_RAPTORLAKE_S, &adl_features }, { INTEL_METEORLAKE, &adl_features }, { INTEL_METEORLAKE_L, &adl_features }, - { INTEL_ARROWLAKE_H, &arl_features }, - { INTEL_ARROWLAKE_U, &arl_features }, - { INTEL_ARROWLAKE, &arl_features }, + { INTEL_ARROWLAKE_H, &adl_features }, + { INTEL_ARROWLAKE_U, &adl_features }, + { INTEL_ARROWLAKE, &adl_features }, { INTEL_LUNARLAKE_M, &arl_features }, { INTEL_ATOM_SILVERMONT, &slv_features }, { INTEL_ATOM_SILVERMONT_D, &slvd_features }, -- 2.50.1 From 3ae5f34384176a4a8742dd11ab0e1e062dcc6ce2 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 14 Nov 2024 15:59:39 +0800 Subject: [PATCH 07/16] tools/power turbostat: Rename arl_features to lnl_features As ARL shares the same features with ADL/RPL/MTL, now 'arl_features' is used by Lunarlake platform only. Rename 'arl_features' to 'lnl_features'. No functional change. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0ba2564f512a..dd2a90b1d12d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -752,7 +752,7 @@ static const struct platform_features adl_features = { .enable_tsc_tweak = 1, }; -static const struct platform_features arl_features = { +static const struct platform_features lnl_features = { .has_msr_misc_feature_control = 1, .has_msr_misc_pwr_mgmt = 1, .has_nhm_msrs = 1, @@ -1014,7 +1014,7 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_ARROWLAKE_H, &adl_features }, { INTEL_ARROWLAKE_U, &adl_features }, { INTEL_ARROWLAKE, &adl_features }, - { INTEL_LUNARLAKE_M, &arl_features }, + { INTEL_LUNARLAKE_M, &lnl_features }, { INTEL_ATOM_SILVERMONT, &slv_features }, { INTEL_ATOM_SILVERMONT_D, &slvd_features }, { INTEL_ATOM_AIRMONT, &amt_features }, -- 2.50.1 From 26c57a152bb4ab21757cb0cf439c4e8e0b5f61a9 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 14 Nov 2024 15:59:40 +0800 Subject: [PATCH 08/16] tools/power turbostat: Remove PC3 support on Lunarlake Lunarlake supports CC1/CC6/CC7/PC2/PC6/PC10. Remove PC3 support on Lunarlake. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index dd2a90b1d12d..b0ed8e0f65c2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -758,7 +758,7 @@ static const struct platform_features lnl_features = { .has_nhm_msrs = 1, .has_config_tdp = 1, .bclk_freq = BCLK_100MHZ, - .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC10, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10, .cst_limit = CST_LIMIT_HSW, .has_irtl_msrs = 1, .has_msr_core_c1_res = 1, -- 2.50.1 From d39d586ee44407ec89b9527a9c1f27a91d6b05d1 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 14 Nov 2024 15:59:41 +0800 Subject: [PATCH 09/16] tools/power turbostat: Add initial support for GraniteRapids-D Add initial support for GraniteRapids-D. It shares the same features with SapphireRapids. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b0ed8e0f65c2..90f3119dbd14 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1003,6 +1003,7 @@ static const struct platform_data turbostat_pdata[] = { { INTEL_SAPPHIRERAPIDS_X, &spr_features }, { INTEL_EMERALDRAPIDS_X, &spr_features }, { INTEL_GRANITERAPIDS_X, &spr_features }, + { INTEL_GRANITERAPIDS_D, &spr_features }, { INTEL_LAKEFIELD, &cnl_features }, { INTEL_ALDERLAKE, &adl_features }, { INTEL_ALDERLAKE_L, &adl_features }, -- 2.50.1 From 1958f4e16864f78ab121de08ba4d7a984ed46891 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 14 Nov 2024 15:59:42 +0800 Subject: [PATCH 10/16] tools/power turbostat: Enhance platform divergence description In various generations, platforms often share a majority of features, diverging only in a few specific aspects. The current approach of using hardcoded values in 'platform_features' structure fails to effectively represent these divergences. To improve the description of platform divergence: 1. Each newly introduced 'platform_features' structure must have a base, typically derived from the previous generation. 2. Platform feature values should be inherited from the base structure rather than being hardcoded. This approach ensures a more accurate and maintainable representation of platform-specific features across different generations. Converts `adl_features` and `lnl_features` to follow this new scheme. No functional change. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 58 ++++++++++++++------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 90f3119dbd14..ae841baeca85 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -735,38 +735,40 @@ static const struct platform_features cnl_features = { .enable_tsc_tweak = 1, }; +/* Copied from cnl_features, with PC7/PC9 removed */ static const struct platform_features adl_features = { - .has_msr_misc_feature_control = 1, - .has_msr_misc_pwr_mgmt = 1, - .has_nhm_msrs = 1, - .has_config_tdp = 1, - .bclk_freq = BCLK_100MHZ, - .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, - .cst_limit = CST_LIMIT_HSW, - .has_irtl_msrs = 1, - .has_msr_core_c1_res = 1, - .has_ext_cst_msrs = 1, - .trl_msrs = TRL_BASE, - .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, - .enable_tsc_tweak = 1, + .has_msr_misc_feature_control = cnl_features.has_msr_misc_feature_control, + .has_msr_misc_pwr_mgmt = cnl_features.has_msr_misc_pwr_mgmt, + .has_nhm_msrs = cnl_features.has_nhm_msrs, + .has_config_tdp = cnl_features.has_config_tdp, + .bclk_freq = cnl_features.bclk_freq, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC3 | PC6 | PC8 | PC10, + .cst_limit = cnl_features.cst_limit, + .has_irtl_msrs = cnl_features.has_irtl_msrs, + .has_msr_core_c1_res = cnl_features.has_msr_core_c1_res, + .has_ext_cst_msrs = cnl_features.has_ext_cst_msrs, + .trl_msrs = cnl_features.trl_msrs, + .tcc_offset_bits = cnl_features.tcc_offset_bits, + .rapl_msrs = cnl_features.rapl_msrs, + .enable_tsc_tweak = cnl_features.enable_tsc_tweak, }; +/* Copied from adl_features, with PC3/PC8 removed */ static const struct platform_features lnl_features = { - .has_msr_misc_feature_control = 1, - .has_msr_misc_pwr_mgmt = 1, - .has_nhm_msrs = 1, - .has_config_tdp = 1, - .bclk_freq = BCLK_100MHZ, - .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10, - .cst_limit = CST_LIMIT_HSW, - .has_irtl_msrs = 1, - .has_msr_core_c1_res = 1, - .has_ext_cst_msrs = 1, - .trl_msrs = TRL_BASE, - .tcc_offset_bits = 6, - .rapl_msrs = RAPL_PKG_ALL | RAPL_CORE_ALL | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_GFX, - .enable_tsc_tweak = 1, + .has_msr_misc_feature_control = adl_features.has_msr_misc_feature_control, + .has_msr_misc_pwr_mgmt = adl_features.has_msr_misc_pwr_mgmt, + .has_nhm_msrs = adl_features.has_nhm_msrs, + .has_config_tdp = adl_features.has_config_tdp, + .bclk_freq = adl_features.bclk_freq, + .supported_cstates = CC1 | CC6 | CC7 | PC2 | PC6 | PC10, + .cst_limit = adl_features.cst_limit, + .has_irtl_msrs = adl_features.has_irtl_msrs, + .has_msr_core_c1_res = adl_features.has_msr_core_c1_res, + .has_ext_cst_msrs = adl_features.has_ext_cst_msrs, + .trl_msrs = adl_features.trl_msrs, + .tcc_offset_bits = adl_features.tcc_offset_bits, + .rapl_msrs = adl_features.rapl_msrs, + .enable_tsc_tweak = adl_features.enable_tsc_tweak, }; static const struct platform_features skx_features = { -- 2.50.1 From ba99a4fc8c24dc7d35f18edb6e3b0a65345fbfa3 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 14 Nov 2024 15:59:43 +0800 Subject: [PATCH 11/16] tools/power turbostat: Remove unnecessary fflush() call The graphics sysfs knobs are read-only, making the use of fflush() before reading them redundant. Remove the unnecessary fflush() call. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ae841baeca85..c0596ccf92cd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5780,12 +5780,11 @@ int snapshot_graphics(int idx) case GFX_ACTMHz: case SAM_MHz: case SAM_ACTMHz: - if (gfx_info[idx].fp == NULL) { + if (gfx_info[idx].fp == NULL) gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r"); - } else { + else rewind(gfx_info[idx].fp); - fflush(gfx_info[idx].fp); - } + retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); if (retval != 1) err(1, "MHz"); -- 2.50.1 From d071004e623b7433573019d67cba79e345d83006 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 14 Nov 2024 15:59:44 +0800 Subject: [PATCH 12/16] tools/power turbostat: Consolidate graphics sysfs access Currently, there is an inconsistency in how graphics sysfs knobs are accessed: graphics residency sysfs knobs are opened and closed for each read, while graphics frequency sysfs knobs are opened once and remain open until turbostat exits. This inconsistency is confusing and adds unnecessary code complexity. Consolidate the access method by opening the sysfs files once and reusing the file pointers for subsequent accesses. This approach simplifies the code and ensures a consistent method for accessing graphics sysfs knobs. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c0596ccf92cd..e5b100b8db24 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5764,27 +5764,24 @@ int snapshot_proc_interrupts(void) */ int snapshot_graphics(int idx) { - FILE *fp; int retval; + if (gfx_info[idx].fp == NULL) + gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r"); + else + rewind(gfx_info[idx].fp); + switch (idx) { case GFX_rc6: case SAM_mc6: - fp = fopen_or_die(gfx_info[idx].path, "r"); - retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull); + retval = fscanf(gfx_info[idx].fp, "%lld", &gfx_info[idx].val_ull); if (retval != 1) err(1, "rc6"); - fclose(fp); return 0; case GFX_MHz: case GFX_ACTMHz: case SAM_MHz: case SAM_ACTMHz: - if (gfx_info[idx].fp == NULL) - gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r"); - else - rewind(gfx_info[idx].fp); - retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val); if (retval != 1) err(1, "MHz"); -- 2.50.1 From c7538f33853b11d0ff2a81efb78bde125d1fc49f Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 14 Nov 2024 15:59:45 +0800 Subject: [PATCH 13/16] tools/power turbostat: Cache graphics sysfs file descriptors during probe Snapshots of the graphics sysfs knobs are taken based on file descriptors. To optimize this process, open the files and cache the file descriptors during the graphics probe phase. As a result, the previously cached pathnames become redundant and are removed. This change aims to streamline the code without altering its functionality. No functional change intended. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 82 +++++++++++---------------- 1 file changed, 32 insertions(+), 50 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e5b100b8db24..28513172ffce 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -376,7 +376,6 @@ enum gfx_sysfs_idx { }; struct gfx_sysfs_info { - const char *path; FILE *fp; unsigned int val; unsigned long long val_ull; @@ -5766,10 +5765,7 @@ int snapshot_graphics(int idx) { int retval; - if (gfx_info[idx].fp == NULL) - gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r"); - else - rewind(gfx_info[idx].fp); + rewind(gfx_info[idx].fp); switch (idx) { case GFX_rc6: @@ -6474,6 +6470,12 @@ static void probe_intel_uncore_frequency(void) probe_intel_uncore_frequency_legacy(); } +static void set_graphics_fp(char *path, int idx) +{ + if (!access(path, R_OK)) + gfx_info[idx].fp = fopen_or_die(path, "r"); +} + static void probe_graphics(void) { /* Xe graphics sysfs knobs */ @@ -6481,7 +6483,6 @@ static void probe_graphics(void) FILE *fp; char buf[8]; bool gt0_is_gt; - int idx; fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r"); if (!fp) @@ -6500,28 +6501,17 @@ static void probe_graphics(void) else goto next; - idx = gt0_is_gt ? GFX_rc6 : SAM_mc6; - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", gt0_is_gt ? GFX_rc6 : SAM_mc6); - idx = gt0_is_gt ? GFX_MHz : SAM_MHz; - if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", gt0_is_gt ? GFX_MHz : SAM_MHz); - idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz; - if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz); - idx = gt0_is_gt ? SAM_mc6 : GFX_rc6; - if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", gt0_is_gt ? SAM_mc6 : GFX_rc6); - idx = gt0_is_gt ? SAM_MHz : GFX_MHz; - if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", gt0_is_gt ? SAM_MHz : GFX_MHz); - idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz; - if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK)) - gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq"; + set_graphics_fp("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz); goto end; } @@ -6529,52 +6519,44 @@ static void probe_graphics(void) next: /* New i915 graphics sysfs knobs */ if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) { - gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", GFX_rc6); - if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK)) - gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz"; + set_graphics_fp("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", GFX_MHz); - if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK)) - gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz"; + set_graphics_fp("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", GFX_ACTMHz); - if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK)) - gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", SAM_mc6); - if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK)) - gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz"; + set_graphics_fp("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", SAM_MHz); - if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK)) - gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz"; + set_graphics_fp("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", SAM_ACTMHz); goto end; } /* Fall back to traditional i915 graphics sysfs knobs */ - if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) - gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms"; + set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6); - if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK)) - gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz"; - else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) - gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz"; + set_graphics_fp("/sys/class/drm/card0/gt_cur_freq_mhz", GFX_MHz); + if (!gfx_info[GFX_MHz].fp) + set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", GFX_MHz); - if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK)) - gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz"; - else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) - gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz"; + set_graphics_fp("/sys/class/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); + if (!gfx_info[GFX_ACTMHz].fp) + set_graphics_fp("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", GFX_ACTMHz); end: - if (gfx_info[GFX_rc6].path) + if (gfx_info[GFX_rc6].fp) BIC_PRESENT(BIC_GFX_rc6); - if (gfx_info[GFX_MHz].path) + if (gfx_info[GFX_MHz].fp) BIC_PRESENT(BIC_GFXMHz); - if (gfx_info[GFX_ACTMHz].path) + if (gfx_info[GFX_ACTMHz].fp) BIC_PRESENT(BIC_GFXACTMHz); - if (gfx_info[SAM_mc6].path) + if (gfx_info[SAM_mc6].fp) BIC_PRESENT(BIC_SAM_mc6); - if (gfx_info[SAM_MHz].path) + if (gfx_info[SAM_MHz].fp) BIC_PRESENT(BIC_SAMMHz); - if (gfx_info[SAM_ACTMHz].path) + if (gfx_info[SAM_ACTMHz].fp) BIC_PRESENT(BIC_SAMACTMHz); } -- 2.50.1 From 03109e2f0d18dcb84218bd91c4fbf864193ca934 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 14 Nov 2024 15:59:46 +0800 Subject: [PATCH 14/16] tools/power turbostat: Add support for /sys/class/drm/card1 On some machines, the graphics device is enumerated as /sys/class/drm/card1 instead of /sys/class/drm/card0. The current implementation does not handle this scenario, resulting in the loss of graphics C6 residency and frequency information. Add support for /sys/class/drm/card1, ensuring that turbostat can retrieve and display the graphics columns for these platforms. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 38 ++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 28513172ffce..b250676c174e 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -6476,8 +6476,14 @@ static void set_graphics_fp(char *path, int idx) gfx_info[idx].fp = fopen_or_die(path, "r"); } +/* Enlarge this if there are /sys/class/drm/card2 ... */ +#define GFX_MAX_CARDS 2 + static void probe_graphics(void) { + char path[PATH_MAX]; + int i; + /* Xe graphics sysfs knobs */ if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) { FILE *fp; @@ -6518,22 +6524,36 @@ static void probe_graphics(void) next: /* New i915 graphics sysfs knobs */ - if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) { - set_graphics_fp("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", GFX_rc6); + for (i = 0; i < GFX_MAX_CARDS; i++) { + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); + if (!access(path, R_OK)) + break; + } - set_graphics_fp("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", GFX_MHz); + if (i == GFX_MAX_CARDS) + goto legacy_i915; - set_graphics_fp("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", GFX_ACTMHz); + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rc6_residency_ms", i); + set_graphics_fp(path, GFX_rc6); - set_graphics_fp("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", SAM_mc6); + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_cur_freq_mhz", i); + set_graphics_fp(path, GFX_MHz); - set_graphics_fp("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", SAM_MHz); + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt0/rps_act_freq_mhz", i); + set_graphics_fp(path, GFX_ACTMHz); - set_graphics_fp("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", SAM_ACTMHz); + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rc6_residency_ms", i); + set_graphics_fp(path, SAM_mc6); - goto end; - } + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_cur_freq_mhz", i); + set_graphics_fp(path, SAM_MHz); + + snprintf(path, PATH_MAX, "/sys/class/drm/card%d/gt/gt1/rps_act_freq_mhz", i); + set_graphics_fp(path, SAM_ACTMHz); + + goto end; +legacy_i915: /* Fall back to traditional i915 graphics sysfs knobs */ set_graphics_fp("/sys/class/drm/card0/power/rc6_residency_ms", GFX_rc6); -- 2.50.1 From bcfab87108b33f20d847fd71a2a93114dd2ce83e Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Thu, 24 Oct 2024 15:17:45 +0200 Subject: [PATCH 15/16] tools/power turbostat: Force --no-perf in --dump mode Force the --no-perf early to prevent using it as a source. User asks for raw values, but perf returns them relative to the opening of the file descriptor. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b250676c174e..1fed799a5537 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9897,6 +9897,12 @@ void cmdline(int argc, char **argv) break; case 'D': dump_only++; + /* + * Force the no_perf early to prevent using it as a source. + * User asks for raw values, but perf returns them relative + * to the opening of the file descriptor. + */ + no_perf = 1; break; case 'e': /* --enable specified counter */ -- 2.50.1 From 1da0daf746342dfdc114e4dc8fbf3ece28666d4f Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Wed, 13 Nov 2024 15:48:22 +0100 Subject: [PATCH 16/16] tools/power turbostat: Fix child's argument forwarding Add '+' to optstring when early scanning for --no-msr and --no-perf. It causes option processing to stop as soon as a nonoption argument is encountered, effectively skipping child's arguments. Fixes: 3e4048466c39 ("tools/power turbostat: Add --no-msr option") Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 1fed799a5537..9025c2945737 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9873,7 +9873,7 @@ void cmdline(int argc, char **argv) * Parse some options early, because they may make other options invalid, * like adding the MSR counter with --add and at the same time using --no-msr. */ - while ((opt = getopt_long_only(argc, argv, "MPn:", long_options, &option_index)) != -1) { + while ((opt = getopt_long_only(argc, argv, "+MPn:", long_options, &option_index)) != -1) { switch (opt) { case 'M': no_msr = 1; -- 2.50.1