]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
perf ftrace latency: Introduce --bucket-range to ask for linear bucketing
authorArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 12 Nov 2024 18:12:12 +0000 (15:12 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 10 Dec 2024 18:16:01 +0000 (15:16 -0300)
In addition to showing it exponentially, using log2() to figure out the
histogram index, allow for showing it linearly:

The preexisting more, the default:

  # perf ftrace latency --use-nsec --use-bpf \
   -T switch_mm_irqs_off -a sleep 2
  #   DURATION     |      COUNT | GRAPH                                   |
       0 -    1 ns |          0 |                                         |
       1 -    2 ns |          0 |                                         |
       2 -    4 ns |          0 |                                         |
       4 -    8 ns |          0 |                                         |
       8 -   16 ns |          0 |                                         |
      16 -   32 ns |          0 |                                         |
      32 -   64 ns |          0 |                                         |
      64 -  128 ns |        238 | #                                       |
     128 -  256 ns |       1704 | ##########                              |
     256 -  512 ns |        672 | ###                                     |
     512 - 1024 ns |       4458 | ##########################              |
       1 -    2 us |        677 | ####                                    |
       2 -    4 us |          5 |                                         |
       4 -    8 us |          0 |                                         |
       8 -   16 us |          0 |                                         |
      16 -   32 us |          0 |                                         |
      32 -   64 us |          0 |                                         |
      64 -  128 us |          0 |                                         |
     128 -  256 us |          0 |                                         |
     256 -  512 us |          0 |                                         |
     512 - 1024 us |          0 |                                         |
       1 - ...  ms |          0 |                                         |
  #

The new histogram mode:

  # perf ftrace latency --bucket-range=150 --use-nsec --use-bpf \
   -T switch_mm_irqs_off -a sleep 2
  #   DURATION     |      COUNT | GRAPH                                   |
       0 -    1 ns |          0 |                                         |
       1 -  151 ns |        265 | #                                       |
     151 -  301 ns |       1797 | ###########                             |
     301 -  451 ns |        258 | #                                       |
     451 -  601 ns |        289 | #                                       |
     601 -  751 ns |       2049 | #############                           |
     751 -  901 ns |        967 | ######                                  |
     901 - 1051 ns |        513 | ###                                     |
    1.05 - 1.20 us |        114 |                                         |
    1.20 - 1.35 us |        559 | ###                                     |
    1.35 - 1.50 us |        189 | #                                       |
    1.50 - 1.65 us |        137 |                                         |
    1.65 - 1.80 us |         32 |                                         |
    1.80 - 1.95 us |          2 |                                         |
    1.95 - 2.10 us |          0 |                                         |
    2.10 - 2.25 us |          1 |                                         |
    2.25 - 2.40 us |          1 |                                         |
    2.40 - 2.55 us |          0 |                                         |
    2.55 - 2.70 us |          0 |                                         |
    2.70 - 2.85 us |          0 |                                         |
    2.85 - 3.00 us |          1 |                                         |
    3.00 - ...  us |          4 |                                         |
  #

Co-developed-by: Gabriele Monaco <gmonaco@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Clark Williams <williams@redhat.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20241112181214.1171244-3-acme@kernel.org
Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-ftrace.txt
tools/perf/builtin-ftrace.c
tools/perf/util/bpf_ftrace.c
tools/perf/util/bpf_skel/func_latency.bpf.c
tools/perf/util/ftrace.h

index eaec8253be681a0e3c6148ed2bb83b387f5ec18c..e8cc8208e29fca7eb080ae6b51d4bb5d52162817 100644 (file)
@@ -148,6 +148,9 @@ OPTIONS for 'perf ftrace latency'
 --use-nsec::
        Use nano-second instead of micro-second as a base unit of the histogram.
 
+--bucket-range=::
+       Bucket range in ms or ns (according to -n/--use-nsec), default is log2() mode.
+
 
 OPTIONS for 'perf ftrace profile'
 ---------------------------------
index 88b9f0597b925c69d9954a24ebcb8802043608e4..e047e5dcda2656df0ff66619a5bfd3d65728f854 100644 (file)
@@ -777,9 +777,17 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
                if (ftrace->use_nsec)
                        num *= 1000;
 
-               i = log2(num);
-               if (i < 0)
+               if (!ftrace->bucket_range) {
+                       i = log2(num);
+                       if (i < 0)
+                               i = 0;
+               } else {
+                       // Less than 1 unit (ms or ns), or, in the future,
+                       // than the min latency desired.
                        i = 0;
+                       if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
+                               i = num / ftrace->bucket_range + 1;
+               }
                if (i >= NUM_BUCKET)
                        i = NUM_BUCKET - 1;
 
@@ -815,28 +823,58 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
               "  DURATION    ", "COUNT", bar_total, "GRAPH");
 
        bar_len = buckets[0] * bar_total / total;
-       printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
+
+       printf("  %4d - %4d %s | %10d | %.*s%*s |\n",
               0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
 
        for (i = 1; i < NUM_BUCKET - 1; i++) {
-               int start = (1 << (i - 1));
-               int stop = 1 << i;
+               int start, stop;
                const char *unit = use_nsec ? "ns" : "us";
 
-               if (start >= 1024) {
-                       start >>= 10;
-                       stop >>= 10;
-                       unit = use_nsec ? "us" : "ms";
+               if (!ftrace->bucket_range) {
+                       start = (1 << (i - 1));
+                       stop  = 1 << i;
+
+                       if (start >= 1024) {
+                               start >>= 10;
+                               stop >>= 10;
+                               unit = use_nsec ? "us" : "ms";
+                       }
+               } else {
+                       start = (i - 1) * ftrace->bucket_range + 1;
+                       stop  = i * ftrace->bucket_range + 1;
+
+                       if (start >= 1000) {
+                               double dstart = start / 1000.0,
+                                      dstop  = stop / 1000.0;
+                               printf("  %4.2f - %-4.2f", dstart, dstop);
+                               unit = use_nsec ? "us" : "ms";
+                               goto print_bucket_info;
+                       }
                }
+
+               printf("  %4d - %4d", start, stop);
+print_bucket_info:
                bar_len = buckets[i] * bar_total / total;
-               printf("  %4d - %-4d %s | %10d | %.*s%*s |\n",
-                      start, stop, unit, buckets[i], bar_len, bar,
+               printf(" %s | %10d | %.*s%*s |\n", unit, buckets[i], bar_len, bar,
                       bar_total - bar_len, "");
        }
 
        bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
-       printf("  %4d - %-4s %s | %10d | %.*s%*s |\n",
-              1, "...", use_nsec ? "ms" : " s", buckets[NUM_BUCKET - 1],
+       if (!ftrace->bucket_range) {
+               printf("  %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
+       } else {
+               int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range;
+
+               if (upper_outlier >= 1000) {
+                       double dstart = upper_outlier / 1000.0;
+
+                       printf("  %4.2f - %-4s %s", dstart, "...", use_nsec ? "us" : "ms");
+               } else {
+                       printf("  %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us");
+               }
+       }
+       printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
               bar_len, bar, bar_total - bar_len, "");
 
 }
@@ -1558,6 +1596,8 @@ int cmd_ftrace(int argc, const char **argv)
 #endif
        OPT_BOOLEAN('n', "use-nsec", &ftrace.use_nsec,
                    "Use nano-second histogram"),
+       OPT_UINTEGER(0, "bucket-range", &ftrace.bucket_range,
+                   "Bucket range in ms or ns (-n/--use-nsec), default is log2() mode"),
        OPT_PARENT(common_options),
        };
        const struct option profile_options[] = {
index 06d1c4018407a26590e82c389376da4001a85329..b3cb68295e56631cb50311a90db9aafd1962138b 100644 (file)
@@ -36,6 +36,8 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
                return -1;
        }
 
+       skel->rodata->bucket_range = ftrace->bucket_range;
+
        /* don't need to set cpu filter for system-wide mode */
        if (ftrace->target.cpu_list) {
                ncpus = perf_cpu_map__nr(ftrace->evlist->core.user_requested_cpus);
index f613dc9cb123480cf64493ec2b23f389415da3a9..00a340ca1543dff0983b95883c395b75a6267d13 100644 (file)
@@ -41,6 +41,7 @@ int enabled = 0;
 const volatile int has_cpu = 0;
 const volatile int has_task = 0;
 const volatile int use_nsec = 0;
+const volatile unsigned int bucket_range;
 
 SEC("kprobe/func")
 int BPF_PROG(func_begin)
@@ -100,12 +101,25 @@ int BPF_PROG(func_end)
                if (delta < 0)
                        return 0;
 
+               if (bucket_range != 0) {
+                       delta /= cmp_base;
+                       // Less than 1 unit (ms or ns), or, in the future,
+                       // than the min latency desired.
+                       key = 0;
+                       if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
+                               key = delta / bucket_range + 1;
+                               if (key >= NUM_BUCKET)
+                                       key = NUM_BUCKET - 1;
+                       }
+                       goto do_lookup;
+               }
                // calculate index using delta
                for (key = 0; key < (NUM_BUCKET - 1); key++) {
                        if (delta < (cmp_base << key))
                                break;
                }
 
+do_lookup:
                hist = bpf_map_lookup_elem(&latency, &key);
                if (!hist)
                        return 0;
index bae649ef50e8447ab5a612e4007f4b18e640b7d4..6ac136484349a9a5c7daf076f530e710e38c89ef 100644 (file)
@@ -20,6 +20,7 @@ struct perf_ftrace {
        unsigned long           percpu_buffer_size;
        bool                    inherit;
        bool                    use_nsec;
+       unsigned int            bucket_range;
        int                     graph_depth;
        int                     func_stack_trace;
        int                     func_irq_info;