]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
perf report: Add machine parallelism
authorDmitry Vyukov <dvyukov@google.com>
Thu, 13 Feb 2025 09:08:14 +0000 (10:08 +0100)
committerNamhyung Kim <namhyung@kernel.org>
Tue, 18 Feb 2025 06:00:50 +0000 (22:00 -0800)
Add calculation of the current parallelism level (number of threads actively
running on CPUs). The parallelism level can be shown in reports on its own,
and to calculate latency overheads.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/0f8c1b8eb12619029e31b3d5c0346f4616a5aeda.1739437531.git.dvyukov@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
tools/perf/builtin-report.c
tools/perf/util/addr_location.c
tools/perf/util/addr_location.h
tools/perf/util/event.c
tools/perf/util/machine.c
tools/perf/util/machine.h

index f5fbd670d619a32ac320979c9963c47cb3b50670..0d9bd090eda71f15ad79333a55b4e75cc4af5836 100644 (file)
@@ -1568,6 +1568,7 @@ repeat:
        report.tool.cgroup               = perf_event__process_cgroup;
        report.tool.exit                 = perf_event__process_exit;
        report.tool.fork                 = perf_event__process_fork;
+       report.tool.context_switch       = perf_event__process_switch;
        report.tool.lost                 = perf_event__process_lost;
        report.tool.read                 = process_read_event;
        report.tool.attr                 = process_attr;
index 51825ef8c0ab77a701414ba5b2ea0426dc511dc0..007a2f5df9a6a13364adaabf9243ed5f3b2b5df9 100644 (file)
@@ -17,6 +17,7 @@ void addr_location__init(struct addr_location *al)
        al->cpumode = 0;
        al->cpu = 0;
        al->socket = 0;
+       al->parallelism = 1;
 }
 
 /*
index d8ac0428dff232ec9ac4a7e5bbb3cec6fdf7e989..36aaa45445f24cbf000ce7a1176f2dc9ae4483a0 100644 (file)
@@ -21,6 +21,8 @@ struct addr_location {
        u8            cpumode;
        s32           cpu;
        s32           socket;
+       /* Same as machine.parallelism but within [1, nr_cpus]. */
+       int           parallelism;
 };
 
 void addr_location__init(struct addr_location *al);
index aac96d5d19170091438a4c9a063faec360934361..2f10e3115757299f6e09327510e57058f18d8408 100644 (file)
@@ -767,6 +767,9 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
                        al->socket = env->cpu[al->cpu].socket_id;
        }
 
+       /* Account for possible out-of-order switch events. */
+       al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine)));
+
        if (al->map) {
                if (symbol_conf.dso_list &&
                    (!dso || !(strlist__has_entry(symbol_conf.dso_list,
index 55d4977b99137ae526b2fb1b3594d67b469537c1..d96cbfd97ad8fb4da3d3e42d0aec341101242c4a 100644 (file)
@@ -94,6 +94,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
        machine->comm_exec = false;
        machine->kernel_start = 0;
        machine->vmlinux_map = NULL;
+       /* There is no initial context switch in, so we start at 1. */
+       machine->parallelism = 1;
 
        machine->root_dir = strdup(root_dir);
        if (machine->root_dir == NULL)
@@ -677,8 +679,11 @@ int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unus
 int machine__process_switch_event(struct machine *machine __maybe_unused,
                                  union perf_event *event)
 {
+       bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+
        if (dump_trace)
                perf_event__fprintf_switch(event, stdout);
+       machine->parallelism += out ? -1 : 1;
        return 0;
 }
 
@@ -1880,6 +1885,8 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
        if (dump_trace)
                perf_event__fprintf_task(event, stdout);
 
+       /* There is no context switch out before exit, so we decrement here. */
+       machine->parallelism--;
        if (thread != NULL) {
                if (symbol_conf.keep_exited_threads)
                        thread__set_exited(thread, /*exited=*/true);
index ae3e5542d57df26b0aaca63ec570f084ddafcb90..b56abec84fed1e3f1e92b6e221ef7b5d96f6624c 100644 (file)
@@ -50,6 +50,12 @@ struct machine {
                u64       text_start;
                u64       text_end;
        } sched, lock, traceiter, trace;
+       /*
+        * The current parallelism level (number of threads that run on CPUs).
+        * This value can be less than 1, or larger than the total number
+        * of CPUs, if events are poorly ordered.
+        */
+       int               parallelism;
        pid_t             *current_tid;
        size_t            current_tid_sz;
        union { /* Tool specific area */