perf ftrace latency: Add -e option to measure time between two events

author Namhyung Kim <namhyung@kernel.org>

Mon, 14 Jul 2025 05:21:43 +0000 (22:21 -0700)

committer Namhyung Kim <namhyung@kernel.org>

Tue, 15 Jul 2025 05:51:58 +0000 (22:51 -0700)
author Namhyung Kim <namhyung@kernel.org>
Mon, 14 Jul 2025 05:21:43 +0000 (22:21 -0700)
committer Namhyung Kim <namhyung@kernel.org>
Tue, 15 Jul 2025 05:51:58 +0000 (22:51 -0700)
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt

index b77f58c4d2fdcff9e93c7ba75c48c507fe783884..914457853bcf53ac456cde59493fa495bca96a1e 100644 (file)
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -139,6 +139,12 @@ OPTIONS for 'perf ftrace latency'
         Set the function name to get the histogram.  Unlike perf ftrace trace,
         it only allows single function to calculate the histogram.
  
+-e::
+--events=::
+       Set the pair of events to get the histogram.  The histogram is calculated
+       by the time difference between the two events from the same thread.  This
+       requires -b/--use-bpf option.
+
  -b::
  --use-bpf::
         Use BPF to measure function latency instead of using the ftrace (it
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c

index 3a253a1b9f4526b9bcca0f9f4e39fc7933e716a8..e1f2f3fb1b0850a36f5bbbe24b11b787a7ca064c 100644 (file)
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -1549,6 +1549,33 @@ static void delete_filter_func(struct list_head *head)
         }
  }
  
+static int parse_filter_event(const struct option *opt, const char *str,
+                            int unset __maybe_unused)
+{
+       struct list_head *head = opt->value;
+       struct filter_entry *entry;
+       char *s, *p;
+       int ret = -ENOMEM;
+
+       s = strdup(str);
+       if (s == NULL)
+               return -ENOMEM;
+
+       while ((p = strsep(&s, ",")) != NULL) {
+               entry = malloc(sizeof(*entry) + strlen(p) + 1);
+               if (entry == NULL)
+                       goto out;
+
+               strcpy(entry->name, p);
+               list_add_tail(&entry->list, head);
+       }
+       ret = 0;
+
+out:
+       free(s);
+       return ret;
+}
+
  static int parse_buffer_size(const struct option *opt,
                              const char *str, int unset)
  {
@@ -1711,6 +1738,8 @@ int cmd_ftrace(int argc, const char **argv)
         const struct option latency_options[] = {
         OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
                      "Show latency of given function", parse_filter_func),
+       OPT_CALLBACK('e', "events", &ftrace.event_pair, "event1,event2",
+                    "Show latency between the two events", parse_filter_event),
  #ifdef HAVE_BPF_SKEL
         OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
                     "Use BPF to measure function latency"),
@@ -1763,6 +1792,7 @@ int cmd_ftrace(int argc, const char **argv)
         INIT_LIST_HEAD(&ftrace.notrace);
         INIT_LIST_HEAD(&ftrace.graph_funcs);
         INIT_LIST_HEAD(&ftrace.nograph_funcs);
+       INIT_LIST_HEAD(&ftrace.event_pair);
  
         signal(SIGINT, sig_handler);
         signal(SIGUSR1, sig_handler);
@@ -1817,9 +1847,24 @@ int cmd_ftrace(int argc, const char **argv)
                 cmd_func = __cmd_ftrace;
                 break;
         case PERF_FTRACE_LATENCY:
-               if (list_empty(&ftrace.filters)) {
-                       pr_err("Should provide a function to measure\n");
+               if (list_empty(&ftrace.filters) && list_empty(&ftrace.event_pair)) {
+                       pr_err("Should provide a function or events to measure\n");
                         parse_options_usage(ftrace_usage, options, "T", 1);
+                       parse_options_usage(NULL, options, "e", 1);
+                       ret = -EINVAL;
+                       goto out_delete_filters;
+               }
+               if (!list_empty(&ftrace.filters) && !list_empty(&ftrace.event_pair)) {
+                       pr_err("Please specify either of function or events\n");
+                       parse_options_usage(ftrace_usage, options, "T", 1);
+                       parse_options_usage(NULL, options, "e", 1);
+                       ret = -EINVAL;
+                       goto out_delete_filters;
+               }
+               if (!list_empty(&ftrace.event_pair) && !ftrace.target.use_bpf) {
+                       pr_err("Event processing needs BPF\n");
+                       parse_options_usage(ftrace_usage, options, "b", 1);
+                       parse_options_usage(NULL, options, "e", 1);
                         ret = -EINVAL;
                         goto out_delete_filters;
                 }
@@ -1910,6 +1955,7 @@ out_delete_filters:
         delete_filter_func(&ftrace.notrace);
         delete_filter_func(&ftrace.graph_funcs);
         delete_filter_func(&ftrace.nograph_funcs);
+       delete_filter_func(&ftrace.event_pair);
  
         return ret;
  }
diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c

index 7324668cc83e747e9590567551fa28eabf7b34be..0cb02412043c9196fc226a805aec4956fa8eb0e0 100644 (file)
--- a/tools/perf/util/bpf_ftrace.c
+++ b/tools/perf/util/bpf_ftrace.c
@@ -21,15 +21,26 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
  {
         int fd, err;
         int i, ncpus = 1, ntasks = 1;
-       struct filter_entry *func;
+       struct filter_entry *func = NULL;
  
-       if (!list_is_singular(&ftrace->filters)) {
-               pr_err("ERROR: %s target function(s).\n",
-                      list_empty(&ftrace->filters) ? "No" : "Too many");
-               return -1;
-       }
+       if (!list_empty(&ftrace->filters)) {
+               if (!list_is_singular(&ftrace->filters)) {
+                       pr_err("ERROR: Too many target functions.\n");
+                       return -1;
+               }
+               func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+       } else {
+               int count = 0;
+               struct list_head *pos;
  
-       func = list_first_entry(&ftrace->filters, struct filter_entry, list);
+               list_for_each(pos, &ftrace->event_pair)
+                       count++;
+
+               if (count != 2) {
+                       pr_err("ERROR: Needs two target events.\n");
+                       return -1;
+               }
+       }
  
         skel = func_latency_bpf__open();
         if (!skel) {
@@ -93,20 +104,44 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
  
         skel->bss->min = INT64_MAX;
  
-       skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
-                                                           false, func->name);
-       if (IS_ERR(skel->links.func_begin)) {
-               pr_err("Failed to attach fentry program\n");
-               err = PTR_ERR(skel->links.func_begin);
-               goto out;
-       }
+       if (func) {
+               skel->links.func_begin = bpf_program__attach_kprobe(skel->progs.func_begin,
+                                                                   false, func->name);
+               if (IS_ERR(skel->links.func_begin)) {
+                       pr_err("Failed to attach fentry program\n");
+                       err = PTR_ERR(skel->links.func_begin);
+                       goto out;
+               }
  
-       skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
-                                                         true, func->name);
-       if (IS_ERR(skel->links.func_end)) {
-               pr_err("Failed to attach fexit program\n");
-               err = PTR_ERR(skel->links.func_end);
-               goto out;
+               skel->links.func_end = bpf_program__attach_kprobe(skel->progs.func_end,
+                                                                 true, func->name);
+               if (IS_ERR(skel->links.func_end)) {
+                       pr_err("Failed to attach fexit program\n");
+                       err = PTR_ERR(skel->links.func_end);
+                       goto out;
+               }
+       } else {
+               struct filter_entry *event;
+
+               event = list_first_entry(&ftrace->event_pair, struct filter_entry, list);
+
+               skel->links.event_begin = bpf_program__attach_raw_tracepoint(skel->progs.event_begin,
+                                                                            event->name);
+               if (IS_ERR(skel->links.event_begin)) {
+                       pr_err("Failed to attach first tracepoint program\n");
+                       err = PTR_ERR(skel->links.event_begin);
+                       goto out;
+               }
+
+               event = list_next_entry(event, list);
+
+               skel->links.event_end = bpf_program__attach_raw_tracepoint(skel->progs.event_end,
+                                                                            event->name);
+               if (IS_ERR(skel->links.event_end)) {
+                       pr_err("Failed to attach second tracepoint program\n");
+                       err = PTR_ERR(skel->links.event_end);
+                       goto out;
+               }
         }
  
         /* XXX: we don't actually use this fd - just for poll() */
diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c

index e731a79a753a4d2d524f77151a071e0ca8473aaa..621e2022c8bc964819ebaa7140447f97c1e3b360 100644 (file)
--- a/tools/perf/util/bpf_skel/func_latency.bpf.c
+++ b/tools/perf/util/bpf_skel/func_latency.bpf.c
@@ -52,34 +52,89 @@ const volatile unsigned int min_latency;
  const volatile unsigned int max_latency;
  const volatile unsigned int bucket_num = NUM_BUCKET;
  
-SEC("kprobe/func")
-int BPF_PROG(func_begin)
+static bool can_record(void)
  {
-       __u64 key, now;
-
-       if (!enabled)
-               return 0;
-
-       key = bpf_get_current_pid_tgid();
-
         if (has_cpu) {
                 __u32 cpu = bpf_get_smp_processor_id();
                 __u8 *ok;
  
                 ok = bpf_map_lookup_elem(&cpu_filter, &cpu);
                 if (!ok)
-                       return 0;
+                       return false;
         }
  
         if (has_task) {
-               __u32 pid = key & 0xffffffff;
+               __u32 pid = bpf_get_current_pid_tgid();
                 __u8 *ok;
  
                 ok = bpf_map_lookup_elem(&task_filter, &pid);
                 if (!ok)
-                       return 0;
+                       return false;
         }
+       return true;
+}
+
+static void update_latency(__s64 delta)
+{
+       __u64 val = delta;
+       __u32 key = 0;
+       __u64 *hist;
+       __u64 cmp_base = use_nsec ? 1 : 1000;
+
+       if (delta < 0)
+               return;
  
+       if (bucket_range != 0) {
+               val = delta / cmp_base;
+
+               if (min_latency > 0) {
+                       if (val > min_latency)
+                               val -= min_latency;
+                       else
+                               goto do_lookup;
+               }
+
+               // Less than 1 unit (ms or ns), or, in the future,
+               // than the min latency desired.
+               if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+                       key = val / bucket_range + 1;
+                       if (key >= bucket_num)
+                               key = bucket_num - 1;
+               }
+
+               goto do_lookup;
+       }
+       // calculate index using delta
+       for (key = 0; key < (bucket_num - 1); key++) {
+               if (delta < (cmp_base << key))
+                       break;
+       }
+
+do_lookup:
+       hist = bpf_map_lookup_elem(&latency, &key);
+       if (!hist)
+               return;
+
+       __sync_fetch_and_add(hist, 1);
+
+       __sync_fetch_and_add(&total, delta); // always in nsec
+       __sync_fetch_and_add(&count, 1);
+
+       if (delta > max)
+               max = delta;
+       if (delta < min)
+               min = delta;
+}
+
+SEC("kprobe/func")
+int BPF_PROG(func_begin)
+{
+       __u64 key, now;
+
+       if (!enabled || !can_record())
+               return 0;
+
+       key = bpf_get_current_pid_tgid();
         now = bpf_ktime_get_ns();
  
         // overwrite timestamp for nested functions
@@ -92,7 +147,6 @@ int BPF_PROG(func_end)
  {
         __u64 tid;
         __u64 *start;
-       __u64 cmp_base = use_nsec ? 1 : 1000;
  
         if (!enabled)
                 return 0;
@@ -101,56 +155,44 @@ int BPF_PROG(func_end)
  
         start = bpf_map_lookup_elem(&functime, &tid);
         if (start) {
-               __s64 delta = bpf_ktime_get_ns() - *start;
-               __u64 val = delta;
-               __u32 key = 0;
-               __u64 *hist;
-
+               update_latency(bpf_ktime_get_ns() - *start);
                 bpf_map_delete_elem(&functime, &tid);
+       }
  
-               if (delta < 0)
-                       return 0;
+       return 0;
+}
  
-               if (bucket_range != 0) {
-                       val = delta / cmp_base;
+SEC("raw_tp")
+int BPF_PROG(event_begin)
+{
+       __u64 key, now;
  
-                       if (min_latency > 0) {
-                               if (val > min_latency)
-                                       val -= min_latency;
-                               else
-                                       goto do_lookup;
-                       }
+       if (!enabled || !can_record())
+               return 0;
  
-                       // Less than 1 unit (ms or ns), or, in the future,
-                       // than the min latency desired.
-                       if (val > 0) { // 1st entry: [ 1 unit .. bucket_range units )
-                               key = val / bucket_range + 1;
-                               if (key >= bucket_num)
-                                       key = bucket_num - 1;
-                       }
+       key = bpf_get_current_pid_tgid();
+       now = bpf_ktime_get_ns();
  
-                       goto do_lookup;
-               }
-               // calculate index using delta
-               for (key = 0; key < (bucket_num - 1); key++) {
-                       if (delta < (cmp_base << key))
-                               break;
-               }
+       // overwrite timestamp for nested events
+       bpf_map_update_elem(&functime, &key, &now, BPF_ANY);
+       return 0;
+}
  
-do_lookup:
-               hist = bpf_map_lookup_elem(&latency, &key);
-               if (!hist)
-                       return 0;
+SEC("raw_tp")
+int BPF_PROG(event_end)
+{
+       __u64 tid;
+       __u64 *start;
  
-               __sync_fetch_and_add(hist, 1);
+       if (!enabled)
+               return 0;
  
-               __sync_fetch_and_add(&total, delta); // always in nsec
-               __sync_fetch_and_add(&count, 1);
+       tid = bpf_get_current_pid_tgid();
  
-               if (delta > max)
-                       max = delta;
-               if (delta < min)
-                       min = delta;
+       start = bpf_map_lookup_elem(&functime, &tid);
+       if (start) {
+               update_latency(bpf_ktime_get_ns() - *start);
+               bpf_map_delete_elem(&functime, &tid);
         }
  
         return 0;
diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h

index a9bc47da83a56cd685966ad8cbd43bf6c29f3776..3f5094ac59080310975da5f08bfe24197a895abf 100644 (file)
--- a/tools/perf/util/ftrace.h
+++ b/tools/perf/util/ftrace.h
@@ -17,6 +17,7 @@ struct perf_ftrace {
         struct list_head        notrace;
         struct list_head        graph_funcs;
         struct list_head        nograph_funcs;
+       struct list_head        event_pair;
         struct hashmap          *profile_hash;
         unsigned long           percpu_buffer_size;
         bool                    inherit;
author	Namhyung Kim <namhyung@kernel.org>
	Mon, 14 Jul 2025 05:21:43 +0000 (22:21 -0700)
committer	Namhyung Kim <namhyung@kernel.org>
	Tue, 15 Jul 2025 05:51:58 +0000 (22:51 -0700)
tools/perf/Documentation/perf-ftrace.txt		patch \| blob \| history
tools/perf/builtin-ftrace.c		patch \| blob \| history
tools/perf/util/bpf_ftrace.c		patch \| blob \| history
tools/perf/util/bpf_skel/func_latency.bpf.c		patch \| blob \| history
tools/perf/util/ftrace.h		patch \| blob \| history