From: Ian Rogers Date: Sat, 19 Jul 2025 03:05:07 +0000 (-0700) Subject: perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=175c852325a1f566426e2470e5d5d67efc7621dd;p=users%2Fjedix%2Flinux-maple.git perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask For hybrid metrics it is useful to know the number of p-core or e-core CPUs. If a cpumask is specified for the num_cpus or num_cpus_online tool events, compute the value relative to the given mask rather than for the full system. ``` $ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/, tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online, cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true Performance counter stats for 'true': 28 tool/num_cpus/ 16 tool/num_cpus,cpu=cpu_core/ 12 tool/num_cpus,cpu=cpu_atom/ 28 tool/num_cpus_online/ 16 tool/num_cpus_online,cpu=cpu_core/ 12 tool/num_cpus_online,cpu=cpu_atom/ 0.000767205 seconds time elapsed 0.000938000 seconds user 0.000000000 seconds sys ``` Reviewed-by: Thomas Falcon Signed-off-by: Ian Rogers Tested-by: James Clark Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com Signed-off-by: Namhyung Kim --- diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index ca70a14c7cdfd..7fda0ff89c168 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -401,7 +401,7 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx if (ev != TOOL_PMU__EVENT_NONE) { u64 count; - if (tool_pmu__read_event(ev, &count)) + if (tool_pmu__read_event(ev, /*evsel=*/NULL, &count)) result = count; else pr_err("Failure to read '%s'", literal); diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c index 4630b8cc8e529..7aa4f315b0ac2 100644 --- a/tools/perf/util/tool_pmu.c +++ b/tools/perf/util/tool_pmu.c @@ -332,7 +332,7 @@ static bool has_pmem(void) return has_pmem; } -bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) +bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result) { const struct cpu_topology *topology; @@ -347,18 +347,60 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) return true; case TOOL_PMU__EVENT_NUM_CPUS: - *result = cpu__max_present_cpu().cpu; + if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { + /* No evsel to be specific to. */ + *result = cpu__max_present_cpu().cpu; + } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { + /* Evsel just has specific CPUs. */ + *result = perf_cpu_map__nr(evsel->core.cpus); + } else { + /* + * "Any CPU" event that can be scheduled on any CPU in + * the PMU's cpumask. The PMU cpumask should be saved in + * own_cpus. If not present fall back to max. + */ + if (!perf_cpu_map__is_empty(evsel->core.own_cpus)) + *result = perf_cpu_map__nr(evsel->core.own_cpus); + else + *result = cpu__max_present_cpu().cpu; + } return true; case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { struct perf_cpu_map *online = cpu_map__online(); - if (online) { + if (!online) + return false; + + if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { + /* No evsel to be specific to. */ *result = perf_cpu_map__nr(online); - perf_cpu_map__put(online); - return true; + } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { + /* Evsel just has specific CPUs. */ + struct perf_cpu_map *tmp = + perf_cpu_map__intersect(online, evsel->core.cpus); + + *result = perf_cpu_map__nr(tmp); + perf_cpu_map__put(tmp); + } else { + /* + * "Any CPU" event that can be scheduled on any CPU in + * the PMU's cpumask. The PMU cpumask should be saved in + * own_cpus, if not present then just the online cpu + * mask. + */ + if (!perf_cpu_map__is_empty(evsel->core.own_cpus)) { + struct perf_cpu_map *tmp = + perf_cpu_map__intersect(online, evsel->core.own_cpus); + + *result = perf_cpu_map__nr(tmp); + perf_cpu_map__put(tmp); + } else { + *result = perf_cpu_map__nr(online); + } } - return false; + perf_cpu_map__put(online); + return true; } case TOOL_PMU__EVENT_NUM_DIES: topology = online_topology(); @@ -417,7 +459,7 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); val = 0; if (cpu_map_idx == 0 && thread == 0) { - if (!tool_pmu__read_event(ev, &val)) { + if (!tool_pmu__read_event(ev, evsel, &val)) { count->lost++; val = 0; } diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h index c6ad1dd90a56d..d642e7d73910e 100644 --- a/tools/perf/util/tool_pmu.h +++ b/tools/perf/util/tool_pmu.h @@ -34,7 +34,7 @@ enum tool_pmu_event tool_pmu__str_to_event(const char *str); bool tool_pmu__skip_event(const char *name); int tool_pmu__num_skip_events(void); -bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result); +bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result); u64 tool_pmu__cpu_slots_per_cycle(void);