From e8536dd47a98b5dbfac1dcdd2954b502bf656044 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 12 Nov 2024 15:12:12 -0300 Subject: [PATCH] perf ftrace latency: Introduce --bucket-range to ask for linear bucketing In addition to showing it exponentially, using log2() to figure out the histogram index, allow for showing it linearly: The preexisting more, the default: # perf ftrace latency --use-nsec --use-bpf \ -T switch_mm_irqs_off -a sleep 2 # DURATION | COUNT | GRAPH | 0 - 1 ns | 0 | | 1 - 2 ns | 0 | | 2 - 4 ns | 0 | | 4 - 8 ns | 0 | | 8 - 16 ns | 0 | | 16 - 32 ns | 0 | | 32 - 64 ns | 0 | | 64 - 128 ns | 238 | # | 128 - 256 ns | 1704 | ########## | 256 - 512 ns | 672 | ### | 512 - 1024 ns | 4458 | ########################## | 1 - 2 us | 677 | #### | 2 - 4 us | 5 | | 4 - 8 us | 0 | | 8 - 16 us | 0 | | 16 - 32 us | 0 | | 32 - 64 us | 0 | | 64 - 128 us | 0 | | 128 - 256 us | 0 | | 256 - 512 us | 0 | | 512 - 1024 us | 0 | | 1 - ... ms | 0 | | # The new histogram mode: # perf ftrace latency --bucket-range=150 --use-nsec --use-bpf \ -T switch_mm_irqs_off -a sleep 2 # DURATION | COUNT | GRAPH | 0 - 1 ns | 0 | | 1 - 151 ns | 265 | # | 151 - 301 ns | 1797 | ########### | 301 - 451 ns | 258 | # | 451 - 601 ns | 289 | # | 601 - 751 ns | 2049 | ############# | 751 - 901 ns | 967 | ###### | 901 - 1051 ns | 513 | ### | 1.05 - 1.20 us | 114 | | 1.20 - 1.35 us | 559 | ### | 1.35 - 1.50 us | 189 | # | 1.50 - 1.65 us | 137 | | 1.65 - 1.80 us | 32 | | 1.80 - 1.95 us | 2 | | 1.95 - 2.10 us | 0 | | 2.10 - 2.25 us | 1 | | 2.25 - 2.40 us | 1 | | 2.40 - 2.55 us | 0 | | 2.55 - 2.70 us | 0 | | 2.70 - 2.85 us | 0 | | 2.85 - 3.00 us | 1 | | 3.00 - ... us | 4 | | # Co-developed-by: Gabriele Monaco Cc: Adrian Hunter Cc: Clark Williams Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Thomas Gleixner Link: https://lore.kernel.org/r/20241112181214.1171244-3-acme@kernel.org Signed-off-by: Gabriele Monaco Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-ftrace.txt | 3 + tools/perf/builtin-ftrace.c | 66 +++++++++++++++++---- tools/perf/util/bpf_ftrace.c | 2 + tools/perf/util/bpf_skel/func_latency.bpf.c | 14 +++++ tools/perf/util/ftrace.h | 1 + 5 files changed, 73 insertions(+), 13 deletions(-) diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index eaec8253be68..e8cc8208e29f 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -148,6 +148,9 @@ OPTIONS for 'perf ftrace latency' --use-nsec:: Use nano-second instead of micro-second as a base unit of the histogram. +--bucket-range=:: + Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode. + OPTIONS for 'perf ftrace profile' --------------------------------- diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 88b9f0597b92..e047e5dcda26 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -777,9 +777,17 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[], if (ftrace->use_nsec) num *= 1000; - i = log2(num); - if (i < 0) + if (!ftrace->bucket_range) { + i = log2(num); + if (i < 0) + i = 0; + } else { + // Less than 1 unit (ms or ns), or, in the future, + // than the min latency desired. i = 0; + if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ] + i = num / ftrace->bucket_range + 1; + } if (i >= NUM_BUCKET) i = NUM_BUCKET - 1; @@ -815,28 +823,58 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[]) " DURATION ", "COUNT", bar_total, "GRAPH"); bar_len = buckets[0] * bar_total / total; - printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", + + printf(" %4d - %4d %s | %10d | %.*s%*s |\n", 0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); for (i = 1; i < NUM_BUCKET - 1; i++) { - int start = (1 << (i - 1)); - int stop = 1 << i; + int start, stop; const char *unit = use_nsec ? "ns" : "us"; - if (start >= 1024) { - start >>= 10; - stop >>= 10; - unit = use_nsec ? "us" : "ms"; + if (!ftrace->bucket_range) { + start = (1 << (i - 1)); + stop = 1 << i; + + if (start >= 1024) { + start >>= 10; + stop >>= 10; + unit = use_nsec ? "us" : "ms"; + } + } else { + start = (i - 1) * ftrace->bucket_range + 1; + stop = i * ftrace->bucket_range + 1; + + if (start >= 1000) { + double dstart = start / 1000.0, + dstop = stop / 1000.0; + printf(" %4.2f - %-4.2f", dstart, dstop); + unit = use_nsec ? "us" : "ms"; + goto print_bucket_info; + } } + + printf(" %4d - %4d", start, stop); +print_bucket_info: bar_len = buckets[i] * bar_total / total; - printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", - start, stop, unit, buckets[i], bar_len, bar, + printf(" %s | %10d | %.*s%*s |\n", unit, buckets[i], bar_len, bar, bar_total - bar_len, ""); } bar_len = buckets[NUM_BUCKET - 1] * bar_total / total; - printf(" %4d - %-4s %s | %10d | %.*s%*s |\n", - 1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1], + if (!ftrace->bucket_range) { + printf(" %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s "); + } else { + int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range; + + if (upper_outlier >= 1000) { + double dstart = upper_outlier / 1000.0; + + printf(" %4.2f - %-4s %s", dstart, "...", use_nsec ? "us" : "ms"); + } else { + printf(" %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us"); + } + } + printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1], bar_len, bar, bar_total - bar_len, ""); } @@ -1558,6 +1596,8 @@ int cmd_ftrace(int argc, const char **argv) #endif OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec, "Use nano-second histogram"), + OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range, + "Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"), OPT_PARENT(common_options), }; const struct option profile_options[] = { diff --git a/tools/perf/util/bpf_ftrace.c b/tools/perf/util/bpf_ftrace.c index 06d1c4018407..b3cb68295e56 100644 --- a/tools/perf/util/bpf_ftrace.c +++ b/tools/perf/util/bpf_ftrace.c @@ -36,6 +36,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace) return -1; } + skel->rodata->bucket_range = ftrace->bucket_range; + /* don't need to set cpu filter for system-wide mode */ if (ftrace->target.cpu_list) { ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus); diff --git a/tools/perf/util/bpf_skel/func_latency.bpf.c b/tools/perf/util/bpf_skel/func_latency.bpf.c index f613dc9cb123..00a340ca1543 100644 --- a/tools/perf/util/bpf_skel/func_latency.bpf.c +++ b/tools/perf/util/bpf_skel/func_latency.bpf.c @@ -41,6 +41,7 @@ int enabled = 0; const volatile int has_cpu = 0; const volatile int has_task = 0; const volatile int use_nsec = 0; +const volatile unsigned int bucket_range; SEC("kprobe/func") int BPF_PROG(func_begin) @@ -100,12 +101,25 @@ int BPF_PROG(func_end) if (delta < 0) return 0; + if (bucket_range != 0) { + delta /= cmp_base; + // Less than 1 unit (ms or ns), or, in the future, + // than the min latency desired. + key = 0; + if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units ) + key = delta / bucket_range + 1; + if (key >= NUM_BUCKET) + key = NUM_BUCKET - 1; + } + goto do_lookup; + } // calculate index using delta for (key = 0; key < (NUM_BUCKET - 1); key++) { if (delta < (cmp_base << key)) break; } +do_lookup: hist = bpf_map_lookup_elem(&latency, &key); if (!hist) return 0; diff --git a/tools/perf/util/ftrace.h b/tools/perf/util/ftrace.h index bae649ef50e8..6ac136484349 100644 --- a/tools/perf/util/ftrace.h +++ b/tools/perf/util/ftrace.h @@ -20,6 +20,7 @@ struct perf_ftrace { unsigned long percpu_buffer_size; bool inherit; bool use_nsec; + unsigned int bucket_range; int graph_depth; int func_stack_trace; int func_irq_info; -- 2.50.1