From: Ian Rogers Date: Thu, 3 Apr 2025 19:43:37 +0000 (-0700) Subject: perf parse-events: Add "cpu" term to set the CPU an event is recorded on X-Git-Url: https://www.infradead.org/git/?a=commitdiff_plain;h=255f5b6d060be5a43de6edf42149b1391f69e3fc;p=users%2Fjedix%2Flinux-maple.git perf parse-events: Add "cpu" term to set the CPU an event is recorded on The -C option allows the CPUs for a list of events to be specified but its not possible to set the CPU for a single event. Add a term to allow this. The term isn't a general CPU list due to ',' already being a special character in event parsing instead multiple cpu= terms may be provided and they will be merged/unioned together. An example of mixing different types of events counted on different CPUs: ``` $ perf stat -A -C 0,4-5,8 -e "instructions/cpu=0/,l1d-misses/cpu=4,cpu=5/,inst_retired.any/cpu=8/,cycles" -a sleep 0.1 Performance counter stats for 'system wide': CPU0 6,979,225 instructions/cpu=0/ # 0.89 insn per cycle CPU4 75,138 cpu/l1d-misses/ CPU5 1,418,939 cpu/l1d-misses/ CPU8 797,553 cpu/inst_retired.any,cpu=8/ CPU0 7,845,302 cycles CPU4 6,546,859 cycles CPU5 185,915,438 cycles CPU8 2,065,668 cycles 0.112449242 seconds time elapsed ``` Committer testing: root@number:~# grep -m1 "model name" /proc/cpuinfo model name : AMD Ryzen 9 9950X3D 16-Core Processor root@number:~# perf stat -A -e "instructions/cpu=0/,instructions,l1d-misses/cpu=4,cpu=5/,cycles" -a sleep 0.1 Performance counter stats for 'system wide': CPU0 2,398,351 instructions/cpu=0/ # 0.44 insn per cycle CPU0 2,398,152 instructions # 0.44 insn per cycle CPU1 1,265,634 instructions # 0.49 insn per cycle CPU2 606,087 instructions # 0.50 insn per cycle CPU3 4,025,752 instructions # 0.52 insn per cycle CPU4 4,236,810 instructions # 0.53 insn per cycle CPU5 3,984,832 instructions # 0.66 insn per cycle CPU6 434,132 instructions # 0.44 insn per cycle CPU7 65,752 instructions # 0.41 insn per cycle CPU8 459,083 instructions # 0.48 insn per cycle CPU9 6,464,161 instructions # 1.31 insn per cycle root@number:~# perf stat -e "instructions/cpu=0/,instructions,l1d-misses/cpu=4,cpu=5/,cycles" -a sleep 0. Performance counter stats for 'system wide': 144,822 instructions/cpu=0/ # 0.03 insn per cycle 4,666,114 instructions # 0.93 insn per cycle 2,583 l1d-misses 4,993,633 cycles 0.000868512 seconds time elapsed root@number:~# Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Tested-by: Kan Liang Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Dominique Martinet Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Weilin Wang Cc: Yicong Yang Link: https://lore.kernel.org/r/20250403194337.40202-5-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 8914f12d2b85..ce0735021473 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -289,6 +289,15 @@ Sums up the event counts for all hardware threads in a core, e.g.: perf stat -e cpu/event=0,umask=0x3,percore=1/ +cpu: + +Specifies the CPU to open the event upon. The value may be repeated to +specify opening the event on multiple CPUs: + + + perf stat -e instructions/cpu=0,cpu=2/,cycles/cpu=1,cpu=2/ -a sleep 1 + perf stat -e data_read/cpu=0/,data_write/cpu=1/ -a sleep 1 + EVENT GROUPS ------------ diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index af52a1516d0b..94a1e9cf73d6 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -48,6 +48,7 @@ struct evsel_config_term { u32 aux_sample_size; u64 cfg_chg; char *str; + int cpu; } val; bool weak; }; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index cf4f9b59a185..177d045577c4 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -7,6 +7,7 @@ #include #include #include +#include "cpumap.h" #include "term.h" #include "env.h" #include "evlist.h" @@ -180,6 +181,26 @@ static char *get_config_name(const struct parse_events_terms *head_terms) return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); } +static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms) +{ + struct parse_events_term *term; + struct perf_cpu_map *cpus = NULL; + + if (!head_terms) + return NULL; + + list_for_each_entry(term, &head_terms->terms, list) { + if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) { + struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num); + + perf_cpu_map__merge(&cpus, cpu); + perf_cpu_map__put(cpu); + } + } + + return cpus; +} + /** * fix_raw - For each raw term see if there is an event (aka alias) in pmu that * matches the raw's string value. If the string value matches an @@ -443,11 +464,12 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, bool found_supported = false; const char *config_name = get_config_name(parsed_terms); const char *metric_id = get_config_metric_id(parsed_terms); + struct perf_cpu_map *cpus = get_config_cpu(parsed_terms); + int ret = 0; while ((pmu = perf_pmus__scan(pmu)) != NULL) { LIST_HEAD(config_terms); struct perf_event_attr attr; - int ret; if (parse_events__filter_pmu(parse_state, pmu)) continue; @@ -462,7 +484,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, perf_pmu__auto_merge_stats(pmu), /*alternate_hw_config=*/PERF_COUNT_HW_MAX); if (ret) - return ret; + goto out_err; continue; } @@ -482,21 +504,27 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, if (parsed_terms) { if (config_attr(&attr, parsed_terms, parse_state->error, - config_term_common)) - return -EINVAL; - - if (get_config_terms(parsed_terms, &config_terms)) - return -ENOMEM; + config_term_common)) { + ret = -EINVAL; + goto out_err; + } + if (get_config_terms(parsed_terms, &config_terms)) { + ret = -ENOMEM; + goto out_err; + } } if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL, - /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) - return -ENOMEM; + cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) + ret = -ENOMEM; free_config_terms(&config_terms); + if (ret) + goto out_err; } +out_err: + perf_cpu_map__put(cpus); return found_supported ? 0 : -EINVAL; } @@ -815,6 +843,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type) [PARSE_EVENTS__TERM_TYPE_RAW] = "raw", [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache", [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware", + [PARSE_EVENTS__TERM_TYPE_CPU] = "cpu", }; if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR) return "unknown term"; @@ -844,6 +873,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_PERCORE: + case PARSE_EVENTS__TERM_TYPE_CPU: return true; case PARSE_EVENTS__TERM_TYPE_USER: case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: @@ -991,6 +1021,15 @@ do { \ return -EINVAL; } break; + case PARSE_EVENTS__TERM_TYPE_CPU: + CHECK_TYPE_VAL(NUM); + if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { + parse_events_error__handle(err, term->err_val, + strdup("too big"), + NULL); + return -EINVAL; + } + break; case PARSE_EVENTS__TERM_TYPE_DRV_CFG: case PARSE_EVENTS__TERM_TYPE_USER: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: @@ -1118,6 +1157,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_RAW: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: default: if (err) { parse_events_error__handle(err, term->err_term, @@ -1252,6 +1292,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_RAW: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: default: break; } @@ -1306,6 +1347,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head case PARSE_EVENTS__TERM_TYPE_RAW: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: default: break; } @@ -1350,6 +1392,7 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, struct perf_event_attr attr; LIST_HEAD(config_terms); const char *name, *metric_id; + struct perf_cpu_map *cpus; int ret; memset(&attr, 0, sizeof(attr)); @@ -1371,10 +1414,11 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, name = get_config_name(head_config); metric_id = get_config_metric_id(head_config); + cpus = get_config_cpu(head_config); ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX - ) == NULL ? -ENOMEM : 0; + metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, + cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM; + perf_cpu_map__put(cpus); free_config_terms(&config_terms); return ret; } @@ -1434,6 +1478,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, LIST_HEAD(config_terms); struct parse_events_terms parsed_terms; bool alias_rewrote_terms = false; + struct perf_cpu_map *term_cpu = NULL; if (verbose > 1) { struct strbuf sb; @@ -1528,11 +1573,12 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, return -EINVAL; } + term_cpu = get_config_cpu(&parsed_terms); evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, get_config_name(&parsed_terms), get_config_metric_id(&parsed_terms), pmu, - &config_terms, auto_merge_stats, /*cpu_list=*/NULL, - alternate_hw_config); + &config_terms, auto_merge_stats, term_cpu, alternate_hw_config); + perf_cpu_map__put(term_cpu); if (!evsel) { parse_events_terms__exit(&parsed_terms); return -ENOMEM; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index e176a34ab088..ab242f671031 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -80,7 +80,8 @@ enum parse_events__term_type { PARSE_EVENTS__TERM_TYPE_RAW, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, PARSE_EVENTS__TERM_TYPE_HARDWARE, -#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1) + PARSE_EVENTS__TERM_TYPE_CPU, +#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_CPU + 1) }; struct parse_events_term { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 7ed86e3e34e3..4af7b9c1f44d 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -335,6 +335,7 @@ aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } +cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); } cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index d08972aa461c..798810704f5b 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1470,7 +1470,7 @@ static int pmu_config_term(const struct perf_pmu *pmu, break; case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ return -EINVAL; - case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_CPU: /* Skip non-config terms. */ break; default: @@ -1852,6 +1852,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call "aux-output", "aux-action=(pause|resume|start-paused)", "aux-sample-size=number", + "cpu=number", }; struct perf_pmu_format *format; int ret;