From 8b3b1bb3ea1f930d44d79dfb0b8cb7d62db08ed6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 2 Sep 2024 13:05:15 -0700 Subject: [PATCH] perf record offcpu: Constify control data for BPF The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf record --off-cpu ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 1.807 MB perf.data (5645 samples) ] root@x1:~# perf evlist cpu_atom/cycles/P cpu_core/cycles/P offcpu-time dummy:u root@x1:~# perf evlist -v cpu_atom/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0xa00000000, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1 cpu_core/cycles/P: type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x400000000, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1 offcpu-time: type: 1 (software), size: 136, config: 0xa (PERF_COUNT_SW_BPF_OUTPUT), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CALLCHAIN|CPU|PERIOD|IDENTIFIER, read_format: ID|LOST, disabled: 1, inherit: 1, freq: 1, sample_id_all: 1 dummy:u: type: 1 (software), size: 136, config: 0x9 (PERF_COUNT_SW_DUMMY), { sample_period, sample_freq }: 1, sample_type: IP|TID|TIME|CPU|IDENTIFIER, read_format: ID|LOST, inherit: 1, exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1, ksymbol: 1, bpf_event: 1 root@x1:~# perf trace -e bpf --max-events 5 perf record --off-cpu 0.000 ( 0.015 ms): :2949124/2949124 bpf(cmd: 36, uattr: 0x7ffefc6dbe30, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.031 ( 0.115 ms): :2949124/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbb60, size: 148) = 14 0.159 ( 0.037 ms): :2949124/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbc20, size: 148) = 14 23.868 ( 0.144 ms): perf/2949124 bpf(cmd: PROG_LOAD, uattr: 0x7ffefc6dbad0, size: 148) = 14 24.027 ( 0.014 ms): perf/2949124 bpf(uattr: 0x7ffefc6dbc80, size: 80) = 14 root@x1:~# Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Song Liu Link: https://lore.kernel.org/r/20240902200515.2103769-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_off_cpu.c | 16 ++++++++-------- tools/perf/util/bpf_skel/off_cpu.bpf.c | 9 +++++---- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index 6af36142dc5a..a590a8ac1f9d 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -73,14 +73,12 @@ static void off_cpu_start(void *arg) struct evlist *evlist = arg; /* update task filter for the given workload */ - if (!skel->bss->has_cpu && !skel->bss->has_task && + if (skel->rodata->has_task && skel->rodata->uses_tgid && perf_thread_map__pid(evlist->core.threads, 0) != -1) { int fd; u32 pid; u8 val = 1; - skel->bss->has_task = 1; - skel->bss->uses_tgid = 1; fd = bpf_map__fd(skel->maps.task_filter); pid = perf_thread_map__pid(evlist->core.threads, 0); bpf_map_update_elem(fd, &pid, &val, BPF_ANY); @@ -148,6 +146,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, if (target->cpu_list) { ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); + skel->rodata->has_cpu = 1; } if (target->pid) { @@ -173,11 +172,16 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, ntasks = MAX_PROC; bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + skel->rodata->has_task = 1; + skel->rodata->uses_tgid = 1; } else if (target__has_task(target)) { ntasks = perf_thread_map__nr(evlist->core.threads); bpf_map__set_max_entries(skel->maps.task_filter, ntasks); + skel->rodata->has_task = 1; } else if (target__none(target)) { bpf_map__set_max_entries(skel->maps.task_filter, MAX_PROC); + skel->rodata->has_task = 1; + skel->rodata->uses_tgid = 1; } if (evlist__first(evlist)->cgrp) { @@ -186,6 +190,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, if (!cgroup_is_v2("perf_event")) skel->rodata->uses_cgroup_v1 = true; + skel->rodata->has_cgroup = 1; } if (opts->record_cgroup) { @@ -208,7 +213,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, u32 cpu; u8 val = 1; - skel->bss->has_cpu = 1; fd = bpf_map__fd(skel->maps.cpu_filter); for (i = 0; i < ncpus; i++) { @@ -220,8 +224,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, if (target->pid) { u8 val = 1; - skel->bss->has_task = 1; - skel->bss->uses_tgid = 1; fd = bpf_map__fd(skel->maps.task_filter); strlist__for_each_entry(pos, pid_slist) { @@ -240,7 +242,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, u32 pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); for (i = 0; i < ntasks; i++) { @@ -253,7 +254,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, struct evsel *evsel; u8 val = 1; - skel->bss->has_cgroup = 1; fd = bpf_map__fd(skel->maps.cgroup_filter); evlist__for_each_entry(evlist, evsel) { diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index d877a0a9731f..c152116df72f 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -85,10 +85,11 @@ struct task_struct___old { } __attribute__((preserve_access_index)); int enabled = 0; -int has_cpu = 0; -int has_task = 0; -int has_cgroup = 0; -int uses_tgid = 0; + +const volatile int has_cpu = 0; +const volatile int has_task = 0; +const volatile int has_cgroup = 0; +const volatile int uses_tgid = 0; const volatile bool has_prev_state = false; const volatile bool needs_cgroup = false; -- 2.50.1