]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
perf report: Add parallelism filter
authorDmitry Vyukov <dvyukov@google.com>
Thu, 13 Feb 2025 09:08:17 +0000 (10:08 +0100)
committerNamhyung Kim <namhyung@kernel.org>
Tue, 18 Feb 2025 22:04:32 +0000 (14:04 -0800)
Add parallelism filter that can be used to look at specific parallelism
levels only. The format is the same as cpu lists. For example:

Only single-threaded samples: --parallelism=1
Low parallelism only: --parallelism=1-4
High parallelism only: --parallelism=64-128

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/e61348985ff0a6a14b07c39e880edbd60a8f8635.1739437531.git.dvyukov@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
tools/perf/builtin-report.c
tools/perf/util/event.c
tools/perf/util/hist.c
tools/perf/util/hist.h
tools/perf/util/sort.c
tools/perf/util/symbol.c
tools/perf/util/symbol_conf.h

index 14d49f062588131eb616e72f1bfeb6b323ee3bf2..2a19abdc869a179c21c577e8cfb33e29390bcf2d 100644 (file)
@@ -1390,6 +1390,8 @@ int cmd_report(int argc, const char **argv)
                     symbol__config_symfs),
        OPT_STRING('C', "cpu", &report.cpu_list, "cpu",
                   "list of cpus to profile"),
+       OPT_STRING(0, "parallelism", &symbol_conf.parallelism_list_str, "parallelism",
+                  "only consider these parallelism levels (cpu set format)"),
        OPT_BOOLEAN('I', "show-info", &report.show_full_info,
                    "Display extended information about perf.data file"),
        OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src,
@@ -1721,7 +1723,8 @@ repeat:
        }
 
        if (report.disable_order || !perf_session__has_switch_events(session)) {
-               if ((sort_order && strstr(sort_order, "parallelism")) ||
+               if (symbol_conf.parallelism_list_str ||
+                               (sort_order && strstr(sort_order, "parallelism")) ||
                                (field_order && strstr(field_order, "parallelism"))) {
                        if (report.disable_order)
                                ui__error("Use of parallelism is incompatible with --disable-order.\n");
index 2f10e3115757299f6e09327510e57058f18d8408..6ceed46acd5a49b8f2ce4fee8ccfc5a8596274d6 100644 (file)
@@ -769,6 +769,8 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
 
        /* Account for possible out-of-order switch events. */
        al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine)));
+       if (test_bit(al->parallelism, symbol_conf.parallelism_filter))
+               al->filtered |= (1 << HIST_FILTER__PARALLELISM);
 
        if (al->map) {
                if (symbol_conf.dso_list &&
index 6b8f8da8d3b66223f4c13172ac7074d7b5a9fdfa..446342246f5ee6af5488657cd2dd1cc62ad24db6 100644 (file)
@@ -43,6 +43,8 @@ static bool hists__filter_entry_by_symbol(struct hists *hists,
                                          struct hist_entry *he);
 static bool hists__filter_entry_by_socket(struct hists *hists,
                                          struct hist_entry *he);
+static bool hists__filter_entry_by_parallelism(struct hists *hists,
+                                              struct hist_entry *he);
 
 u16 hists__col_len(struct hists *hists, enum hist_column col)
 {
@@ -1457,6 +1459,10 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he,
                if (symbol_conf.sym_list == NULL)
                        return;
                break;
+       case HIST_FILTER__PARALLELISM:
+               if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0)
+                       return;
+               break;
        case HIST_FILTER__PARENT:
        case HIST_FILTER__GUEST:
        case HIST_FILTER__HOST:
@@ -1515,6 +1521,9 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
        hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
                                            perf_hpp__is_sym_entry);
 
+       hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM,
+                                           perf_hpp__is_parallelism_entry);
+
        hists__apply_filters(he->hists, he);
 }
 
@@ -1711,6 +1720,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
        hists__filter_entry_by_thread(hists, he);
        hists__filter_entry_by_symbol(hists, he);
        hists__filter_entry_by_socket(hists, he);
+       hists__filter_entry_by_parallelism(hists, he);
 }
 
 int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
@@ -2197,6 +2207,16 @@ static bool hists__filter_entry_by_socket(struct hists *hists,
        return false;
 }
 
+static bool hists__filter_entry_by_parallelism(struct hists *hists,
+                                              struct hist_entry *he)
+{
+       if (test_bit(he->parallelism, hists->parallelism_filter)) {
+               he->filtered |= (1 << HIST_FILTER__PARALLELISM);
+               return true;
+       }
+       return false;
+}
+
 typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
 
 static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
@@ -2366,6 +2386,16 @@ void hists__filter_by_socket(struct hists *hists)
                                      hists__filter_entry_by_socket);
 }
 
+void hists__filter_by_parallelism(struct hists *hists)
+{
+       if (symbol_conf.report_hierarchy)
+               hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM,
+                                       hists->parallelism_filter);
+       else
+               hists__filter_by_type(hists, HIST_FILTER__PARALLELISM,
+                                     hists__filter_entry_by_parallelism);
+}
+
 void events_stats__inc(struct events_stats *stats, u32 type)
 {
        ++stats->nr_events[0];
@@ -2872,6 +2902,7 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
        hists->entries = RB_ROOT_CACHED;
        mutex_init(&hists->lock);
        hists->socket_filter = -1;
+       hists->parallelism_filter = symbol_conf.parallelism_filter;
        hists->hpp_list = hpp_list;
        INIT_LIST_HEAD(&hists->hpp_formats);
        return 0;
index 4035106a74087356e35b2f9f1b02e2939c6629f9..c2236e0d89f2a67941f2bbf7dbc31777a65d61dc 100644 (file)
@@ -31,6 +31,7 @@ enum hist_filter {
        HIST_FILTER__HOST,
        HIST_FILTER__SOCKET,
        HIST_FILTER__C2C,
+       HIST_FILTER__PARALLELISM,
 };
 
 typedef u16 filter_mask_t;
@@ -112,6 +113,7 @@ struct hists {
        const struct dso        *dso_filter;
        const char              *uid_filter_str;
        const char              *symbol_filter_str;
+       unsigned long           *parallelism_filter;
        struct mutex            lock;
        struct hists_stats      stats;
        u64                     event_stream;
@@ -388,11 +390,13 @@ void hists__filter_by_dso(struct hists *hists);
 void hists__filter_by_thread(struct hists *hists);
 void hists__filter_by_symbol(struct hists *hists);
 void hists__filter_by_socket(struct hists *hists);
+void hists__filter_by_parallelism(struct hists *hists);
 
 static inline bool hists__has_filter(struct hists *hists)
 {
        return hists->thread_filter || hists->dso_filter ||
-               hists->symbol_filter_str || (hists->socket_filter > -1);
+               hists->symbol_filter_str || (hists->socket_filter > -1) ||
+               hists->parallelism_filter;
 }
 
 u16 hists__col_len(struct hists *hists, enum hist_column col);
index 7eef43f5be3604b51ff41e7ed6e26e145679f9ec..3055496358ebb182506c01b22e854e1b577dc3b2 100644 (file)
@@ -900,6 +900,16 @@ sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right)
        return right->parallelism - left->parallelism;
 }
 
+static int hist_entry__parallelism_filter(struct hist_entry *he, int type, const void *arg)
+{
+       const unsigned long *parallelism_filter = arg;
+
+       if (type != HIST_FILTER__PARALLELISM)
+               return -1;
+
+       return test_bit(he->parallelism, parallelism_filter);
+}
+
 static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf,
                                    size_t size, unsigned int width)
 {
@@ -909,6 +919,7 @@ static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf,
 struct sort_entry sort_parallelism = {
        .se_header      = "Parallelism",
        .se_cmp         = sort__parallelism_cmp,
+       .se_filter      = hist_entry__parallelism_filter,
        .se_snprintf    = hist_entry__parallelism_snprintf,
        .se_width_idx   = HISTC_PARALLELISM,
 };
index 49b08adc6ee343650b5022b4ac88add2b113250d..315f74b5bac0609b20a60ea4237c6e04e4697647 100644 (file)
@@ -18,6 +18,7 @@
 #include "annotate.h"
 #include "build-id.h"
 #include "cap.h"
+#include "cpumap.h"
 #include "dso.h"
 #include "util.h" // lsdir()
 #include "debug.h"
@@ -2471,6 +2472,36 @@ int symbol__annotation_init(void)
        return 0;
 }
 
+static int setup_parallelism_bitmap(void)
+{
+       struct perf_cpu_map *map;
+       struct perf_cpu cpu;
+       int i, err = -1;
+
+       if (symbol_conf.parallelism_list_str == NULL)
+               return 0;
+
+       map = perf_cpu_map__new(symbol_conf.parallelism_list_str);
+       if (map == NULL) {
+               pr_err("failed to parse parallelism filter list\n");
+               return -1;
+       }
+
+       bitmap_fill(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1);
+       perf_cpu_map__for_each_cpu(cpu, i, map) {
+               if (cpu.cpu <= 0 || cpu.cpu > MAX_NR_CPUS) {
+                       pr_err("Requested parallelism level %d is invalid.\n", cpu.cpu);
+                       goto out_delete_map;
+               }
+               __clear_bit(cpu.cpu, symbol_conf.parallelism_filter);
+       }
+
+       err = 0;
+out_delete_map:
+       perf_cpu_map__put(map);
+       return err;
+}
+
 int symbol__init(struct perf_env *env)
 {
        const char *symfs;
@@ -2490,6 +2521,9 @@ int symbol__init(struct perf_env *env)
                return -1;
        }
 
+       if (setup_parallelism_bitmap())
+               return -1;
+
        if (setup_list(&symbol_conf.dso_list,
                       symbol_conf.dso_list_str, "dso") < 0)
                return -1;
index a9c51acc722fed385c8d9ae1e80831f44754a17a..c5b2e56127e228d80368f3f88c33ed00bb3c2e47 100644 (file)
@@ -3,6 +3,8 @@
 #define __PERF_SYMBOL_CONF 1
 
 #include <stdbool.h>
+#include <linux/bitmap.h>
+#include "perf.h"
 
 struct strlist;
 struct intlist;
@@ -62,6 +64,7 @@ struct symbol_conf {
                        *pid_list_str,
                        *tid_list_str,
                        *sym_list_str,
+                       *parallelism_list_str,
                        *col_width_list_str,
                        *bt_stop_list_str;
        const char              *addr2line_path;
@@ -82,6 +85,7 @@ struct symbol_conf {
        int             pad_output_len_dso;
        int             group_sort_idx;
        int             addr_range;
+       DECLARE_BITMAP(parallelism_filter, MAX_NR_CPUS + 1);
 };
 
 extern struct symbol_conf symbol_conf;