]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
perf ftrace latency: variable histogram buckets
authorGabriele Monaco <gmonaco@redhat.com>
Fri, 7 Feb 2025 08:04:44 +0000 (09:04 +0100)
committerNamhyung Kim <namhyung@kernel.org>
Wed, 26 Feb 2025 21:48:02 +0000 (13:48 -0800)
The max-latency value can make the histogram smaller, but not larger, we
have a maximum of 22 buckets and specifying a max-latency that would
require more buckets has no effect.

Dynamically allocate the buckets and compute the bucket number from the
max latency as (max-min) / range + 2

If the maximum is not specified, we still set the bucket number to 22
and compute the maximum accordingly.

Fail if the maximum is smaller than min+range, this way we make sure we
always have 3 buckets: those below min, those above max and one in the
middle.

Since max-latency is not available in log2 mode, always use 22 buckets.

Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
Link: https://lore.kernel.org/r/20250207080446.77630-1-gmonaco@redhat.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
tools/perf/builtin-ftrace.c
tools/perf/util/bpf_ftrace.c
tools/perf/util/bpf_skel/func_latency.bpf.c
tools/perf/util/ftrace.h

index cfd770ec72867d775c0e19932166e49f69d6fca0..4f76094ea06d40c7ad63f960161f84993e28f693 100644 (file)
@@ -733,6 +733,7 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
 {
        int min_latency = ftrace->min_latency;
        int max_latency = ftrace->max_latency;
+       unsigned int bucket_num = ftrace->bucket_num;
        char *p, *q;
        char *unit;
        double num;
@@ -797,10 +798,10 @@ static void make_histogram(struct perf_ftrace *ftrace, int buckets[],
                        if (num > 0) // 1st entry: [ 1 unit .. bucket_range units ]
                                i = num / ftrace->bucket_range + 1;
                        if (num >= max_latency - min_latency)
-                               i = NUM_BUCKET -1;
+                               i = bucket_num -1;
                }
-               if (i >= NUM_BUCKET)
-                       i = NUM_BUCKET - 1;
+               if ((unsigned)i >= bucket_num)
+                       i = bucket_num - 1;
 
                num += min_latency;
 do_inc:
@@ -820,13 +821,14 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
 {
        int min_latency = ftrace->min_latency;
        bool use_nsec = ftrace->use_nsec;
-       int i;
+       unsigned int bucket_num = ftrace->bucket_num;
+       unsigned int i;
        int total = 0;
        int bar_total = 46;  /* to fit in 80 column */
        char bar[] = "###############################################";
        int bar_len;
 
-       for (i = 0; i < NUM_BUCKET; i++)
+       for (i = 0; i < bucket_num; i++)
                total += buckets[i];
 
        if (total == 0) {
@@ -843,7 +845,7 @@ static void display_histogram(struct perf_ftrace *ftrace, int buckets[])
               0, min_latency ?: 1, use_nsec ? "ns" : "us",
               buckets[0], bar_len, bar, bar_total - bar_len, "");
 
-       for (i = 1; i < NUM_BUCKET - 1; i++) {
+       for (i = 1; i < bucket_num - 1; i++) {
                unsigned int start, stop;
                const char *unit = use_nsec ? "ns" : "us";
 
@@ -881,11 +883,11 @@ print_bucket_info:
                       bar_total - bar_len, "");
        }
 
-       bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
+       bar_len = buckets[bucket_num - 1] * bar_total / total;
        if (!ftrace->bucket_range) {
                printf("  %4d - %-4s %s", 1, "...", use_nsec ? "ms" : "s ");
        } else {
-               unsigned int upper_outlier = (NUM_BUCKET - 2) * ftrace->bucket_range + min_latency;
+               unsigned int upper_outlier = (bucket_num - 2) * ftrace->bucket_range + min_latency;
                if (upper_outlier > ftrace->max_latency)
                        upper_outlier = ftrace->max_latency;
 
@@ -897,7 +899,7 @@ print_bucket_info:
                        printf("  %4d - %4s %s", upper_outlier, "...", use_nsec ? "ns" : "us");
                }
        }
-       printf(" | %10d | %.*s%*s |\n", buckets[NUM_BUCKET - 1],
+       printf(" | %10d | %.*s%*s |\n", buckets[bucket_num - 1],
               bar_len, bar, bar_total - bar_len, "");
 
        printf("\n# statistics  (in %s)\n", ftrace->use_nsec ? "nsec" : "usec");
@@ -997,7 +999,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
        struct pollfd pollfd = {
                .events = POLLIN,
        };
-       int buckets[NUM_BUCKET] = { };
+       int *buckets;
 
        trace_fd = prepare_func_latency(ftrace);
        if (trace_fd < 0)
@@ -1011,6 +1013,12 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
 
        evlist__start_workload(ftrace->evlist);
 
+       buckets = calloc(ftrace->bucket_num, sizeof(*buckets));
+       if (buckets == NULL) {
+               pr_err("failed to allocate memory for the buckets\n");
+               goto out;
+       }
+
        line[0] = '\0';
        while (!done) {
                if (poll(&pollfd, 1, -1) < 0)
@@ -1030,7 +1038,7 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
        if (workload_exec_errno) {
                const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
                pr_err("workload failed: %s\n", emsg);
-               goto out;
+               goto out_free_buckets;
        }
 
        /* read remaining buffer contents */
@@ -1045,6 +1053,8 @@ static int __cmd_latency(struct perf_ftrace *ftrace)
 
        display_histogram(ftrace, buckets);
 
+out_free_buckets:
+       free(buckets);
 out:
        close(trace_fd);
        cleanup_func_latency(ftrace);
@@ -1634,7 +1644,7 @@ int cmd_ftrace(int argc, const char **argv)
        OPT_UINTEGER(0, "min-latency", &ftrace.min_latency,
                    "Minimum latency (1st bucket). Works only with --bucket-range."),
        OPT_UINTEGER(0, "max-latency", &ftrace.max_latency,
-                   "Maximum latency (last bucket). Works only with --bucket-range and total buckets less than 22."),
+                   "Maximum latency (last bucket). Works only with --bucket-range."),
        OPT_PARENT(common_options),
        };
        const struct option profile_options[] = {
@@ -1751,10 +1761,25 @@ int cmd_ftrace(int argc, const char **argv)
                        ret = -EINVAL;
                        goto out_delete_filters;
                }
-               if (ftrace.bucket_range && !ftrace.max_latency) {
-                       /* default max latency should depend on bucket range and num_buckets */
-                       ftrace.max_latency = (NUM_BUCKET - 2) * ftrace.bucket_range +
-                                               ftrace.min_latency;
+               if (ftrace.bucket_range && ftrace.max_latency &&
+                   ftrace.max_latency < ftrace.min_latency + ftrace.bucket_range) {
+                       /* we need at least 1 bucket excluding min and max buckets */
+                       pr_err("--max-latency must be larger than min-latency + bucket-range\n");
+                       parse_options_usage(ftrace_usage, options,
+                                           "max-latency", /*short_opt=*/false);
+                       ret = -EINVAL;
+                       goto out_delete_filters;
+               }
+               /* set default unless max_latency is set and valid */
+               ftrace.bucket_num = NUM_BUCKET;
+               if (ftrace.bucket_range) {
+                       if (ftrace.max_latency)
+                               ftrace.bucket_num = (ftrace.max_latency - ftrace.min_latency) /
+                                                       ftrace.bucket_range + 2;
+                       else
+                               /* default max latency should depend on bucket range and num_buckets */
+                               ftrace.max_latency = (NUM_BUCKET - 2) * ftrace.bucket_range +
+                                                       ftrace.min_latency;
                }
                cmd_func = __cmd_latency;
                break;
index 25fc280e414ac5fe2b67d66c2284c0d4e5b93a91..51f407a782d6c58af06f6deb09afe9de227ac2db 100644 (file)
@@ -39,6 +39,10 @@ int perf_ftrace__latency_prepare_bpf(struct perf_ftrace *ftrace)
 
        skel->rodata->bucket_range = ftrace->bucket_range;
        skel->rodata->min_latency = ftrace->min_latency;
+       skel->rodata->bucket_num = ftrace->bucket_num;
+       if (ftrace->bucket_range && ftrace->bucket_num) {
+               bpf_map__set_max_entries(skel->maps.latency, ftrace->bucket_num);
+       }
 
        /* don't need to set cpu filter for system-wide mode */
        if (ftrace->target.cpu_list) {
@@ -138,7 +142,7 @@ int perf_ftrace__latency_read_bpf(struct perf_ftrace *ftrace __maybe_unused,
        if (hist == NULL)
                return -ENOMEM;
 
-       for (idx = 0; idx < NUM_BUCKET; idx++) {
+       for (idx = 0; idx < skel->rodata->bucket_num; idx++) {
                err = bpf_map_lookup_elem(fd, &idx, hist);
                if (err) {
                        buckets[idx] = 0;
index fb144811b34fc72a29abc49932f87c43ae2ce61a..09e70d40a0f4d8550a53d54c86eba156f388b8f4 100644 (file)
@@ -50,6 +50,7 @@ const volatile int use_nsec = 0;
 const volatile unsigned int bucket_range;
 const volatile unsigned int min_latency;
 const volatile unsigned int max_latency;
+const volatile unsigned int bucket_num = NUM_BUCKET;
 
 SEC("kprobe/func")
 int BPF_PROG(func_begin)
@@ -124,16 +125,16 @@ int BPF_PROG(func_end)
                        if (delta > 0) { // 1st entry: [ 1 unit .. bucket_range units )
                                // clang 12 doesn't like s64 / u32 division
                                key = (__u64)delta / bucket_range + 1;
-                               if (key >= NUM_BUCKET ||
+                               if (key >= bucket_num ||
                                        delta >= max_latency - min_latency)
-                                       key = NUM_BUCKET - 1;
+                                       key = bucket_num - 1;
                        }
 
                        delta += min_latency;
                        goto do_lookup;
                }
                // calculate index using delta
-               for (key = 0; key < (NUM_BUCKET - 1); key++) {
+               for (key = 0; key < (bucket_num - 1); key++) {
                        if (delta < (cmp_base << key))
                                break;
                }
index 5dee2caba0fe403f98d95c35149725259d0c9aab..395f97b203ead4fbf03eca490201e570990acc21 100644 (file)
@@ -24,6 +24,7 @@ struct perf_ftrace {
        unsigned int            bucket_range;
        unsigned int            min_latency;
        unsigned int            max_latency;
+       unsigned int            bucket_num;
        int                     graph_depth;
        int                     func_stack_trace;
        int                     func_irq_info;