From 61b6b31c2f51f8757ecc65df8a4f5eeff029a804 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Thu, 13 Feb 2025 10:08:17 +0100 Subject: [PATCH] perf report: Add parallelism filter Add parallelism filter that can be used to look at specific parallelism levels only. The format is the same as cpu lists. For example: Only single-threaded samples: --parallelism=1 Low parallelism only: --parallelism=1-4 High parallelism only: --parallelism=64-128 Signed-off-by: Dmitry Vyukov Reviewed-by: Andi Kleen Link: https://lore.kernel.org/r/e61348985ff0a6a14b07c39e880edbd60a8f8635.1739437531.git.dvyukov@google.com Signed-off-by: Namhyung Kim --- tools/perf/builtin-report.c | 5 ++++- tools/perf/util/event.c | 2 ++ tools/perf/util/hist.c | 31 +++++++++++++++++++++++++++++++ tools/perf/util/hist.h | 6 +++++- tools/perf/util/sort.c | 11 +++++++++++ tools/perf/util/symbol.c | 34 ++++++++++++++++++++++++++++++++++ tools/perf/util/symbol_conf.h | 4 ++++ 7 files changed, 91 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 14d49f062588..2a19abdc869a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1390,6 +1390,8 @@ int cmd_report(int argc, const char **argv) symbol__config_symfs), OPT_STRING('C', "cpu", &report.cpu_list, "cpu", "list of cpus to profile"), + OPT_STRING(0, "parallelism", &symbol_conf.parallelism_list_str, "parallelism", + "only consider these parallelism levels (cpu set format)"), OPT_BOOLEAN('I', "show-info", &report.show_full_info, "Display extended information about perf.data file"), OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, @@ -1721,7 +1723,8 @@ repeat: } if (report.disable_order || !perf_session__has_switch_events(session)) { - if ((sort_order && strstr(sort_order, "parallelism")) || + if (symbol_conf.parallelism_list_str || + (sort_order && strstr(sort_order, "parallelism")) || (field_order && strstr(field_order, "parallelism"))) { if (report.disable_order) ui__error("Use of parallelism is incompatible with --disable-order.\n"); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 2f10e3115757..6ceed46acd5a 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -769,6 +769,8 @@ int machine__resolve(struct machine *machine, struct addr_location *al, /* Account for possible out-of-order switch events. */ al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine))); + if (test_bit(al->parallelism, symbol_conf.parallelism_filter)) + al->filtered |= (1 << HIST_FILTER__PARALLELISM); if (al->map) { if (symbol_conf.dso_list && diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6b8f8da8d3b6..446342246f5e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -43,6 +43,8 @@ static bool hists__filter_entry_by_symbol(struct hists *hists, struct hist_entry *he); static bool hists__filter_entry_by_socket(struct hists *hists, struct hist_entry *he); +static bool hists__filter_entry_by_parallelism(struct hists *hists, + struct hist_entry *he); u16 hists__col_len(struct hists *hists, enum hist_column col) { @@ -1457,6 +1459,10 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he, if (symbol_conf.sym_list == NULL) return; break; + case HIST_FILTER__PARALLELISM: + if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0) + return; + break; case HIST_FILTER__PARENT: case HIST_FILTER__GUEST: case HIST_FILTER__HOST: @@ -1515,6 +1521,9 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL, perf_hpp__is_sym_entry); + hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM, + perf_hpp__is_parallelism_entry); + hists__apply_filters(he->hists, he); } @@ -1711,6 +1720,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) hists__filter_entry_by_thread(hists, he); hists__filter_entry_by_symbol(hists, he); hists__filter_entry_by_socket(hists, he); + hists__filter_entry_by_parallelism(hists, he); } int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) @@ -2197,6 +2207,16 @@ static bool hists__filter_entry_by_socket(struct hists *hists, return false; } +static bool hists__filter_entry_by_parallelism(struct hists *hists, + struct hist_entry *he) +{ + if (test_bit(he->parallelism, hists->parallelism_filter)) { + he->filtered |= (1 << HIST_FILTER__PARALLELISM); + return true; + } + return false; +} + typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he); static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter) @@ -2366,6 +2386,16 @@ void hists__filter_by_socket(struct hists *hists) hists__filter_entry_by_socket); } +void hists__filter_by_parallelism(struct hists *hists) +{ + if (symbol_conf.report_hierarchy) + hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM, + hists->parallelism_filter); + else + hists__filter_by_type(hists, HIST_FILTER__PARALLELISM, + hists__filter_entry_by_parallelism); +} + void events_stats__inc(struct events_stats *stats, u32 type) { ++stats->nr_events[0]; @@ -2872,6 +2902,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) hists->entries = RB_ROOT_CACHED; mutex_init(&hists->lock); hists->socket_filter = -1; + hists->parallelism_filter = symbol_conf.parallelism_filter; hists->hpp_list = hpp_list; INIT_LIST_HEAD(&hists->hpp_formats); return 0; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4035106a7408..c2236e0d89f2 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -31,6 +31,7 @@ enum hist_filter { HIST_FILTER__HOST, HIST_FILTER__SOCKET, HIST_FILTER__C2C, + HIST_FILTER__PARALLELISM, }; typedef u16 filter_mask_t; @@ -112,6 +113,7 @@ struct hists { const struct dso *dso_filter; const char *uid_filter_str; const char *symbol_filter_str; + unsigned long *parallelism_filter; struct mutex lock; struct hists_stats stats; u64 event_stream; @@ -388,11 +390,13 @@ void hists__filter_by_dso(struct hists *hists); void hists__filter_by_thread(struct hists *hists); void hists__filter_by_symbol(struct hists *hists); void hists__filter_by_socket(struct hists *hists); +void hists__filter_by_parallelism(struct hists *hists); static inline bool hists__has_filter(struct hists *hists) { return hists->thread_filter || hists->dso_filter || - hists->symbol_filter_str || (hists->socket_filter > -1); + hists->symbol_filter_str || (hists->socket_filter > -1) || + hists->parallelism_filter; } u16 hists__col_len(struct hists *hists, enum hist_column col); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7eef43f5be36..3055496358eb 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -900,6 +900,16 @@ sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right) return right->parallelism - left->parallelism; } +static int hist_entry__parallelism_filter(struct hist_entry *he, int type, const void *arg) +{ + const unsigned long *parallelism_filter = arg; + + if (type != HIST_FILTER__PARALLELISM) + return -1; + + return test_bit(he->parallelism, parallelism_filter); +} + static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf, size_t size, unsigned int width) { @@ -909,6 +919,7 @@ static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf, struct sort_entry sort_parallelism = { .se_header = "Parallelism", .se_cmp = sort__parallelism_cmp, + .se_filter = hist_entry__parallelism_filter, .se_snprintf = hist_entry__parallelism_snprintf, .se_width_idx = HISTC_PARALLELISM, }; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 49b08adc6ee3..315f74b5bac0 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -18,6 +18,7 @@ #include "annotate.h" #include "build-id.h" #include "cap.h" +#include "cpumap.h" #include "dso.h" #include "util.h" // lsdir() #include "debug.h" @@ -2471,6 +2472,36 @@ int symbol__annotation_init(void) return 0; } +static int setup_parallelism_bitmap(void) +{ + struct perf_cpu_map *map; + struct perf_cpu cpu; + int i, err = -1; + + if (symbol_conf.parallelism_list_str == NULL) + return 0; + + map = perf_cpu_map__new(symbol_conf.parallelism_list_str); + if (map == NULL) { + pr_err("failed to parse parallelism filter list\n"); + return -1; + } + + bitmap_fill(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1); + perf_cpu_map__for_each_cpu(cpu, i, map) { + if (cpu.cpu <= 0 || cpu.cpu > MAX_NR_CPUS) { + pr_err("Requested parallelism level %d is invalid.\n", cpu.cpu); + goto out_delete_map; + } + __clear_bit(cpu.cpu, symbol_conf.parallelism_filter); + } + + err = 0; +out_delete_map: + perf_cpu_map__put(map); + return err; +} + int symbol__init(struct perf_env *env) { const char *symfs; @@ -2490,6 +2521,9 @@ int symbol__init(struct perf_env *env) return -1; } + if (setup_parallelism_bitmap()) + return -1; + if (setup_list(&symbol_conf.dso_list, symbol_conf.dso_list_str, "dso") < 0) return -1; diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index a9c51acc722f..c5b2e56127e2 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -3,6 +3,8 @@ #define __PERF_SYMBOL_CONF 1 #include +#include +#include "perf.h" struct strlist; struct intlist; @@ -62,6 +64,7 @@ struct symbol_conf { *pid_list_str, *tid_list_str, *sym_list_str, + *parallelism_list_str, *col_width_list_str, *bt_stop_list_str; const char *addr2line_path; @@ -82,6 +85,7 @@ struct symbol_conf { int pad_output_len_dso; int group_sort_idx; int addr_range; + DECLARE_BITMAP(parallelism_filter, MAX_NR_CPUS + 1); }; extern struct symbol_conf symbol_conf; -- 2.50.1