selftests/bpf: Add LPM trie microbenchmarks

author Matt Fleming <mfleming@cloudflare.com>

Wed, 27 Aug 2025 14:01:49 +0000 (15:01 +0100)

committer Alexei Starovoitov <ast@kernel.org>

Thu, 28 Aug 2025 00:28:14 +0000 (17:28 -0700)
author Matt Fleming <mfleming@cloudflare.com>
Wed, 27 Aug 2025 14:01:49 +0000 (15:01 +0100)
committer Alexei Starovoitov <ast@kernel.org>
Thu, 28 Aug 2025 00:28:14 +0000 (17:28 -0700)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index fd6b370c8169848d00552eb8a3c2a6a2daa24834..11d2a368db3eaab519e71768724a85621db4c52d 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -816,6 +816,7 @@ $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
  $(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
  $(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
  $(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
+$(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h
  $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
  $(OUTPUT)/bench: LDLIBS += -lm
  $(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -837,6 +838,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
                  $(OUTPUT)/bench_htab_mem.o \
                  $(OUTPUT)/bench_bpf_crypto.o \
                  $(OUTPUT)/bench_sockmap.o \
+                $(OUTPUT)/bench_lpm_trie_map.o \
                  #
         $(call msg,BINARY,,$@)
         $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c

index 3ecc226ea7b25d1be45ffb7f02baf5cc48cc3ee5..df4489ac14a0c993f372f82634ccca4c36fd2a63 100644 (file)
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -284,6 +284,7 @@ extern struct argp bench_htab_mem_argp;
  extern struct argp bench_trigger_batch_argp;
  extern struct argp bench_crypto_argp;
  extern struct argp bench_sockmap_argp;
+extern struct argp bench_lpm_trie_map_argp;
  
  static const struct argp_child bench_parsers[] = {
         { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -299,6 +300,7 @@ static const struct argp_child bench_parsers[] = {
         { &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
         { &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
         { &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
+       { &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 },
         {},
  };
  
@@ -558,6 +560,13 @@ extern const struct bench bench_htab_mem;
  extern const struct bench bench_crypto_encrypt;
  extern const struct bench bench_crypto_decrypt;
  extern const struct bench bench_sockmap;
+extern const struct bench bench_lpm_trie_noop;
+extern const struct bench bench_lpm_trie_baseline;
+extern const struct bench bench_lpm_trie_lookup;
+extern const struct bench bench_lpm_trie_insert;
+extern const struct bench bench_lpm_trie_update;
+extern const struct bench bench_lpm_trie_delete;
+extern const struct bench bench_lpm_trie_free;
  
  static const struct bench *benchs[] = {
         &bench_count_global,
@@ -625,6 +634,13 @@ static const struct bench *benchs[] = {
         &bench_crypto_encrypt,
         &bench_crypto_decrypt,
         &bench_sockmap,
+       &bench_lpm_trie_noop,
+       &bench_lpm_trie_baseline,
+       &bench_lpm_trie_lookup,
+       &bench_lpm_trie_insert,
+       &bench_lpm_trie_update,
+       &bench_lpm_trie_delete,
+       &bench_lpm_trie_free,
  };
  
  static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h

index 005c401b3e2275030d1d489cd77423cb1fb652ad..bea323820ffb8888318539f4b12142c4fe969575 100644 (file)
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -46,6 +46,7 @@ struct bench_res {
         unsigned long gp_ns;
         unsigned long gp_ct;
         unsigned int stime;
+       unsigned long duration_ns;
  };
  
  struct bench {
diff --git a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c

new file mode 100644 (file)

index 0000000..246f6cb
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+/*
+ * All of these benchmarks operate on tries with keys in the range
+ * [0, args.nr_entries), i.e. there are no gaps or partially filled
+ * branches of the trie for any key < args.nr_entries.
+ *
+ * This gives an idea of worst-case behaviour.
+ */
+
+#include <argp.h>
+#include <linux/time64.h>
+#include <linux/if_ether.h>
+#include "lpm_trie_bench.skel.h"
+#include "lpm_trie_map.skel.h"
+#include "bench.h"
+#include "testing_helpers.h"
+#include "progs/lpm_trie.h"
+
+static struct ctx {
+       struct lpm_trie_bench *bench;
+} ctx;
+
+static struct {
+       __u32 nr_entries;
+       __u32 prefixlen;
+       bool random;
+} args = {
+       .nr_entries = 0,
+       .prefixlen = 32,
+       .random = false,
+};
+
+enum {
+       ARG_NR_ENTRIES = 9000,
+       ARG_PREFIX_LEN,
+       ARG_RANDOM,
+};
+
+static const struct argp_option opts[] = {
+       { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+         "Number of unique entries in the LPM trie" },
+       { "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0,
+         "Number of prefix bits to use in the LPM trie" },
+       { "random", ARG_RANDOM, NULL, 0, "Access random keys during op" },
+       {},
+};
+
+static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state)
+{
+       long ret;
+
+       switch (key) {
+       case ARG_NR_ENTRIES:
+               ret = strtol(arg, NULL, 10);
+               if (ret < 1 || ret > UINT_MAX) {
+                       fprintf(stderr, "Invalid nr_entries count.");
+                       argp_usage(state);
+               }
+               args.nr_entries = ret;
+               break;
+       case ARG_PREFIX_LEN:
+               ret = strtol(arg, NULL, 10);
+               if (ret < 1 || ret > UINT_MAX) {
+                       fprintf(stderr, "Invalid prefix_len value.");
+                       argp_usage(state);
+               }
+               args.prefixlen = ret;
+               break;
+       case ARG_RANDOM:
+               args.random = true;
+               break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+       return 0;
+}
+
+const struct argp bench_lpm_trie_map_argp = {
+       .options = opts,
+       .parser = lpm_parse_arg,
+};
+
+static void validate_common(void)
+{
+       if (env.consumer_cnt != 0) {
+               fprintf(stderr, "benchmark doesn't support consumer\n");
+               exit(1);
+       }
+
+       if (args.nr_entries == 0) {
+               fprintf(stderr, "Missing --nr_entries parameter\n");
+               exit(1);
+       }
+
+       if ((1UL << args.prefixlen) < args.nr_entries) {
+               fprintf(stderr, "prefix_len value too small for nr_entries\n");
+               exit(1);
+       }
+}
+
+static void lpm_insert_validate(void)
+{
+       validate_common();
+
+       if (env.producer_cnt != 1) {
+               fprintf(stderr, "lpm-trie-insert requires a single producer\n");
+               exit(1);
+       }
+
+       if (args.random) {
+               fprintf(stderr, "lpm-trie-insert does not support --random\n");
+               exit(1);
+       }
+}
+
+static void lpm_delete_validate(void)
+{
+       validate_common();
+
+       if (env.producer_cnt != 1) {
+               fprintf(stderr, "lpm-trie-delete requires a single producer\n");
+               exit(1);
+       }
+
+       if (args.random) {
+               fprintf(stderr, "lpm-trie-delete does not support --random\n");
+               exit(1);
+       }
+}
+
+static void lpm_free_validate(void)
+{
+       validate_common();
+
+       if (env.producer_cnt != 1) {
+               fprintf(stderr, "lpm-trie-free requires a single producer\n");
+               exit(1);
+       }
+
+       if (args.random) {
+               fprintf(stderr, "lpm-trie-free does not support --random\n");
+               exit(1);
+       }
+}
+
+static struct trie_key *keys;
+static __u32 *vals;
+
+static void fill_map(int map_fd)
+{
+       int err;
+
+       DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+               .elem_flags = 0,
+               .flags = 0,
+       );
+
+       err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts);
+       if (err) {
+               fprintf(stderr, "failed to batch update keys to map: %d\n",
+                       -err);
+               exit(1);
+       }
+}
+
+static void empty_map(int map_fd)
+{
+       int err;
+
+       DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+               .elem_flags = 0,
+               .flags = 0,
+       );
+
+       err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts);
+       if (err) {
+               fprintf(stderr, "failed to batch delete keys for map: %d\n",
+                       -err);
+               exit(1);
+       }
+}
+
+static void attach_prog(void)
+{
+       int i;
+
+       ctx.bench = lpm_trie_bench__open_and_load();
+       if (!ctx.bench) {
+               fprintf(stderr, "failed to open skeleton\n");
+               exit(1);
+       }
+
+       ctx.bench->bss->nr_entries = args.nr_entries;
+       ctx.bench->bss->prefixlen = args.prefixlen;
+       ctx.bench->bss->random = args.random;
+
+       if (lpm_trie_bench__attach(ctx.bench)) {
+               fprintf(stderr, "failed to attach skeleton\n");
+               exit(1);
+       }
+
+       keys = calloc(args.nr_entries, sizeof(*keys));
+       vals = calloc(args.nr_entries, sizeof(*vals));
+
+       for (i = 0; i < args.nr_entries; i++) {
+               struct trie_key *k = &keys[i];
+               __u32 *v = &vals[i];
+
+               k->prefixlen = args.prefixlen;
+               k->data = i;
+               *v = 1;
+       }
+}
+
+static void attach_prog_and_fill_map(void)
+{
+       int fd;
+
+       attach_prog();
+
+       fd = bpf_map__fd(ctx.bench->maps.trie_map);
+       fill_map(fd);
+}
+
+static void lpm_noop_setup(void)
+{
+       attach_prog();
+       ctx.bench->bss->op = LPM_OP_NOOP;
+}
+
+static void lpm_baseline_setup(void)
+{
+       attach_prog();
+       ctx.bench->bss->op = LPM_OP_BASELINE;
+}
+
+static void lpm_lookup_setup(void)
+{
+       attach_prog_and_fill_map();
+       ctx.bench->bss->op = LPM_OP_LOOKUP;
+}
+
+static void lpm_insert_setup(void)
+{
+       attach_prog();
+       ctx.bench->bss->op = LPM_OP_INSERT;
+}
+
+static void lpm_update_setup(void)
+{
+       attach_prog_and_fill_map();
+       ctx.bench->bss->op = LPM_OP_UPDATE;
+}
+
+static void lpm_delete_setup(void)
+{
+       attach_prog_and_fill_map();
+       ctx.bench->bss->op = LPM_OP_DELETE;
+}
+
+static void lpm_free_setup(void)
+{
+       attach_prog();
+       ctx.bench->bss->op = LPM_OP_FREE;
+}
+
+static void lpm_measure(struct bench_res *res)
+{
+       res->hits = atomic_swap(&ctx.bench->bss->hits, 0);
+       res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0);
+}
+
+static void bench_reinit_map(void)
+{
+       int fd = bpf_map__fd(ctx.bench->maps.trie_map);
+
+       switch (ctx.bench->bss->op) {
+       case LPM_OP_INSERT:
+               /* trie_map needs to be emptied */
+               empty_map(fd);
+               break;
+       case LPM_OP_DELETE:
+               /* trie_map needs to be refilled */
+               fill_map(fd);
+               break;
+       default:
+               fprintf(stderr, "Unexpected REINIT return code for op %d\n",
+                               ctx.bench->bss->op);
+               exit(1);
+       }
+}
+
+/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */
+static void *lpm_producer(void *unused __always_unused)
+{
+       int err;
+       char in[ETH_HLEN]; /* unused */
+
+       LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in,
+                   .data_size_in = sizeof(in), .repeat = 1, );
+
+       while (true) {
+               int fd = bpf_program__fd(ctx.bench->progs.run_bench);
+               err = bpf_prog_test_run_opts(fd, &opts);
+               if (err) {
+                       fprintf(stderr, "failed to run BPF prog: %d\n", err);
+                       exit(1);
+               }
+
+               /* Check for kernel error code */
+               if ((int)opts.retval < 0) {
+                       fprintf(stderr, "BPF prog returned error: %d\n",
+                               opts.retval);
+                       exit(1);
+               }
+
+               switch (opts.retval) {
+               case LPM_BENCH_SUCCESS:
+                       break;
+               case LPM_BENCH_REINIT_MAP:
+                       bench_reinit_map();
+                       break;
+               default:
+                       fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n",
+                                       opts.retval, ctx.bench->bss->op);
+                       exit(1);
+               }
+       }
+
+       return NULL;
+}
+
+static void *lpm_free_producer(void *unused __always_unused)
+{
+       while (true) {
+               struct lpm_trie_map *skel;
+
+               skel = lpm_trie_map__open_and_load();
+               if (!skel) {
+                       fprintf(stderr, "failed to open skeleton\n");
+                       exit(1);
+               }
+
+               fill_map(bpf_map__fd(skel->maps.trie_free_map));
+               lpm_trie_map__destroy(skel);
+       }
+
+       return NULL;
+}
+
+/*
+ * The standard bench op_report_*() functions assume measurements are
+ * taken over a 1-second interval but operations that modify the map
+ * (INSERT, DELETE, and FREE) cannot run indefinitely without
+ * "resetting" the map to the initial state. Depending on the size of
+ * the map, this likely needs to happen before the 1-second timer fires.
+ *
+ * Calculate the fraction of a second over which the op measurement was
+ * taken (to ignore any time spent doing the reset) and report the
+ * throughput results per second.
+ */
+static void frac_second_report_progress(int iter, struct bench_res *res,
+                                       long delta_ns, double rate_divisor,
+                                       char rate)
+{
+       double hits_per_sec, hits_per_prod;
+
+       hits_per_sec = res->hits / rate_divisor /
+               (res->duration_ns / (double)NSEC_PER_SEC);
+       hits_per_prod = hits_per_sec / env.producer_cnt;
+
+       printf("Iter %3d (%7.3lfus): ", iter,
+              (delta_ns - NSEC_PER_SEC) / 1000.0);
+       printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate,
+              hits_per_prod, rate);
+}
+
+static void frac_second_report_final(struct bench_res res[], int res_cnt,
+                                    double lat_divisor, double rate_divisor,
+                                    char rate, const char *unit)
+{
+       double hits_mean = 0.0, hits_stddev = 0.0;
+       double latency = 0.0;
+       int i;
+
+       for (i = 0; i < res_cnt; i++) {
+               double val = res[i].hits / rate_divisor /
+                            (res[i].duration_ns / (double)NSEC_PER_SEC);
+               hits_mean += val / (0.0 + res_cnt);
+               latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt);
+       }
+
+       if (res_cnt > 1) {
+               for (i = 0; i < res_cnt; i++) {
+                       double val =
+                               res[i].hits / rate_divisor /
+                               (res[i].duration_ns / (double)NSEC_PER_SEC);
+                       hits_stddev += (hits_mean - val) * (hits_mean - val) /
+                                      (res_cnt - 1.0);
+               }
+
+               hits_stddev = sqrt(hits_stddev);
+       }
+       printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ",
+              hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt,
+              rate);
+       printf("latency %8.3lf %s/op\n",
+              latency / lat_divisor / env.producer_cnt, unit);
+}
+
+static void insert_ops_report_progress(int iter, struct bench_res *res,
+                                      long delta_ns)
+{
+       double rate_divisor = 1000000.0;
+       char rate = 'M';
+
+       frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void delete_ops_report_progress(int iter, struct bench_res *res,
+                                      long delta_ns)
+{
+       double rate_divisor = 1000000.0;
+       char rate = 'M';
+
+       frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void free_ops_report_progress(int iter, struct bench_res *res,
+                                    long delta_ns)
+{
+       double rate_divisor = 1000.0;
+       char rate = 'K';
+
+       frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void insert_ops_report_final(struct bench_res res[], int res_cnt)
+{
+       double lat_divisor = 1.0;
+       double rate_divisor = 1000000.0;
+       const char *unit = "ns";
+       char rate = 'M';
+
+       frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+                                unit);
+}
+
+static void delete_ops_report_final(struct bench_res res[], int res_cnt)
+{
+       double lat_divisor = 1.0;
+       double rate_divisor = 1000000.0;
+       const char *unit = "ns";
+       char rate = 'M';
+
+       frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+                                unit);
+}
+
+static void free_ops_report_final(struct bench_res res[], int res_cnt)
+{
+       double lat_divisor = 1000000.0;
+       double rate_divisor = 1000.0;
+       const char *unit = "ms";
+       char rate = 'K';
+
+       frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+                                unit);
+}
+
+/* noop bench measures harness-overhead */
+const struct bench bench_lpm_trie_noop = {
+       .name = "lpm-trie-noop",
+       .argp = &bench_lpm_trie_map_argp,
+       .validate = validate_common,
+       .setup = lpm_noop_setup,
+       .producer_thread = lpm_producer,
+       .measure = lpm_measure,
+       .report_progress = ops_report_progress,
+       .report_final = ops_report_final,
+};
+
+/* baseline overhead for lookup and update */
+const struct bench bench_lpm_trie_baseline = {
+       .name = "lpm-trie-baseline",
+       .argp = &bench_lpm_trie_map_argp,
+       .validate = validate_common,
+       .setup = lpm_baseline_setup,
+       .producer_thread = lpm_producer,
+       .measure = lpm_measure,
+       .report_progress = ops_report_progress,
+       .report_final = ops_report_final,
+};
+
+/* measure cost of doing a lookup on existing entries in a full trie */
+const struct bench bench_lpm_trie_lookup = {
+       .name = "lpm-trie-lookup",
+       .argp = &bench_lpm_trie_map_argp,
+       .validate = validate_common,
+       .setup = lpm_lookup_setup,
+       .producer_thread = lpm_producer,
+       .measure = lpm_measure,
+       .report_progress = ops_report_progress,
+       .report_final = ops_report_final,
+};
+
+/* measure cost of inserting new entries into an empty trie */
+const struct bench bench_lpm_trie_insert = {
+       .name = "lpm-trie-insert",
+       .argp = &bench_lpm_trie_map_argp,
+       .validate = lpm_insert_validate,
+       .setup = lpm_insert_setup,
+       .producer_thread = lpm_producer,
+       .measure = lpm_measure,
+       .report_progress = insert_ops_report_progress,
+       .report_final = insert_ops_report_final,
+};
+
+/* measure cost of updating existing entries in a full trie */
+const struct bench bench_lpm_trie_update = {
+       .name = "lpm-trie-update",
+       .argp = &bench_lpm_trie_map_argp,
+       .validate = validate_common,
+       .setup = lpm_update_setup,
+       .producer_thread = lpm_producer,
+       .measure = lpm_measure,
+       .report_progress = ops_report_progress,
+       .report_final = ops_report_final,
+};
+
+/* measure cost of deleting existing entries from a full trie */
+const struct bench bench_lpm_trie_delete = {
+       .name = "lpm-trie-delete",
+       .argp = &bench_lpm_trie_map_argp,
+       .validate = lpm_delete_validate,
+       .setup = lpm_delete_setup,
+       .producer_thread = lpm_producer,
+       .measure = lpm_measure,
+       .report_progress = delete_ops_report_progress,
+       .report_final = delete_ops_report_final,
+};
+
+/* measure cost of freeing a full trie */
+const struct bench bench_lpm_trie_free = {
+       .name = "lpm-trie-free",
+       .argp = &bench_lpm_trie_map_argp,
+       .validate = lpm_free_validate,
+       .setup = lpm_free_setup,
+       .producer_thread = lpm_free_producer,
+       .measure = lpm_measure,
+       .report_progress = free_ops_report_progress,
+       .report_final = free_ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie.h b/tools/testing/selftests/bpf/progs/lpm_trie.h

new file mode 100644 (file)

index 0000000..76aa582
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_LPM_TRIE_H
+#define __PROGS_LPM_TRIE_H
+
+struct trie_key {
+       __u32 prefixlen;
+       __u32 data;
+};
+
+/* Benchmark operations */
+enum {
+       LPM_OP_NOOP = 0,
+       LPM_OP_BASELINE,
+       LPM_OP_LOOKUP,
+       LPM_OP_INSERT,
+       LPM_OP_UPDATE,
+       LPM_OP_DELETE,
+       LPM_OP_FREE
+};
+
+/*
+ * Return values from run_bench.
+ *
+ * Negative values are also allowed and represent kernel error codes.
+ */
+#define LPM_BENCH_SUCCESS      0
+#define LPM_BENCH_REINIT_MAP   1       /* Reset trie to initial state for current op */
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c

new file mode 100644 (file)

index 0000000..a0e6ebd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_atomic.h"
+#include "progs/lpm_trie.h"
+
+#define BPF_OBJ_NAME_LEN 16U
+#define MAX_ENTRIES 100000000
+#define NR_LOOPS 10000
+
+char _license[] SEC("license") = "GPL";
+
+/* Filled by userspace. See fill_map() in bench_lpm_trie_map.c */
+struct {
+       __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+       __type(key, struct trie_key);
+       __type(value, __u32);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __uint(max_entries, MAX_ENTRIES);
+} trie_map SEC(".maps");
+
+long hits;
+long duration_ns;
+
+/* Configured from userspace */
+__u32 nr_entries;
+__u32 prefixlen;
+bool random;
+__u8 op;
+
+static __u64 latency_free_start;
+
+SEC("fentry/bpf_map_free_deferred")
+int BPF_PROG(trie_free_entry, struct work_struct *work)
+{
+       struct bpf_map *map = container_of(work, struct bpf_map, work);
+       char name[BPF_OBJ_NAME_LEN];
+       u32 map_type;
+
+       map_type = BPF_CORE_READ(map, map_type);
+       if (map_type != BPF_MAP_TYPE_LPM_TRIE)
+               return 0;
+
+       /*
+        * Ideally we'd have access to the map ID but that's already
+        * freed before we enter trie_free().
+        */
+       BPF_CORE_READ_STR_INTO(&name, map, name);
+       if (bpf_strncmp(name, BPF_OBJ_NAME_LEN, "trie_free_map"))
+               return 0;
+
+       latency_free_start = bpf_ktime_get_ns();
+
+       return 0;
+}
+
+SEC("fexit/bpf_map_free_deferred")
+int BPF_PROG(trie_free_exit, struct work_struct *work)
+{
+       __u64 val;
+
+       if (!latency_free_start)
+               return 0;
+
+       val = bpf_ktime_get_ns() - latency_free_start;
+       latency_free_start = 0;
+
+       __sync_add_and_fetch(&duration_ns, val);
+       __sync_add_and_fetch(&hits, 1);
+
+       return 0;
+}
+
+static __u32 cur_key;
+
+static __always_inline void generate_key(struct trie_key *key)
+{
+       key->prefixlen = prefixlen;
+
+       if (random)
+               key->data = bpf_get_prandom_u32() % nr_entries;
+       else
+               key->data = cur_key++ % nr_entries;
+}
+
+static int noop(__u32 index, __u32 *unused)
+{
+       return 0;
+}
+
+static int baseline(__u32 index, __u32 *unused)
+{
+       struct trie_key key;
+       __u32 blackbox = 0;
+
+       generate_key(&key);
+       /* Avoid compiler optimizing out the modulo */
+       barrier_var(blackbox);
+       blackbox = READ_ONCE(key.data);
+
+       return 0;
+}
+
+static int lookup(__u32 index, int *retval)
+{
+       struct trie_key key;
+
+       generate_key(&key);
+       if (!bpf_map_lookup_elem(&trie_map, &key)) {
+               *retval = -ENOENT;
+               return 1;
+       }
+
+       return 0;
+}
+
+static int insert(__u32 index, int *retval)
+{
+       struct trie_key key;
+       u32 val = 1;
+       int err;
+
+       generate_key(&key);
+       err = bpf_map_update_elem(&trie_map, &key, &val, BPF_NOEXIST);
+       if (err) {
+               *retval = err;
+               return 1;
+       }
+
+       /* Is this the last entry? */
+       if (key.data == nr_entries - 1) {
+               /* For atomicity concerns, see the comment in delete() */
+               *retval = LPM_BENCH_REINIT_MAP;
+               return 1;
+       }
+
+       return 0;
+}
+
+static int update(__u32 index, int *retval)
+{
+       struct trie_key key;
+       u32 val = 1;
+       int err;
+
+       generate_key(&key);
+       err = bpf_map_update_elem(&trie_map, &key, &val, BPF_EXIST);
+       if (err) {
+               *retval = err;
+               return 1;
+       }
+
+       return 0;
+}
+
+static int delete(__u32 index, int *retval)
+{
+       struct trie_key key;
+       int err;
+
+       generate_key(&key);
+       err = bpf_map_delete_elem(&trie_map, &key);
+       if (err) {
+               *retval = err;
+               return 1;
+       }
+
+       /* Do we need to refill the map? */
+       if (key.data == nr_entries - 1) {
+               /*
+                * Atomicity isn't required because DELETE only supports
+                * one producer running concurrently. What we need is a
+                * way to track how many entries have been deleted from
+                * the trie between consecutive invocations of the BPF
+                * prog because a single bpf_loop() call might not
+                * delete all entries, e.g. when NR_LOOPS < nr_entries.
+                */
+               *retval = LPM_BENCH_REINIT_MAP;
+               return 1;
+       }
+
+       return 0;
+}
+
+SEC("xdp")
+int BPF_PROG(run_bench)
+{
+       int err = LPM_BENCH_SUCCESS;
+       u64 start, delta;
+       int loops;
+
+       start = bpf_ktime_get_ns();
+
+       switch (op) {
+       case LPM_OP_NOOP:
+               loops = bpf_loop(NR_LOOPS, noop, NULL, 0);
+               break;
+       case LPM_OP_BASELINE:
+               loops = bpf_loop(NR_LOOPS, baseline, NULL, 0);
+               break;
+       case LPM_OP_LOOKUP:
+               loops = bpf_loop(NR_LOOPS, lookup, &err, 0);
+               break;
+       case LPM_OP_INSERT:
+               loops = bpf_loop(NR_LOOPS, insert, &err, 0);
+               break;
+       case LPM_OP_UPDATE:
+               loops = bpf_loop(NR_LOOPS, update, &err, 0);
+               break;
+       case LPM_OP_DELETE:
+               loops = bpf_loop(NR_LOOPS, delete, &err, 0);
+               break;
+       default:
+               bpf_printk("invalid benchmark operation\n");
+               return -1;
+       }
+
+       delta = bpf_ktime_get_ns() - start;
+
+       __sync_add_and_fetch(&duration_ns, delta);
+       __sync_add_and_fetch(&hits, loops);
+
+       return err;
+}
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_map.c b/tools/testing/selftests/bpf/progs/lpm_trie_map.c

new file mode 100644 (file)

index 0000000..6e60d68
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_ENTRIES 100000000
+
+struct trie_key {
+       __u32 prefixlen;
+       __u32 data;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+       __type(key, struct trie_key);
+       __type(value, __u32);
+       __uint(map_flags, BPF_F_NO_PREALLOC);
+       __uint(max_entries, MAX_ENTRIES);
+} trie_free_map SEC(".maps");
author	Matt Fleming <mfleming@cloudflare.com>
	Wed, 27 Aug 2025 14:01:49 +0000 (15:01 +0100)
committer	Alexei Starovoitov <ast@kernel.org>
	Thu, 28 Aug 2025 00:28:14 +0000 (17:28 -0700)
tools/testing/selftests/bpf/Makefile		patch \| blob \| history
tools/testing/selftests/bpf/bench.c		patch \| blob \| history
tools/testing/selftests/bpf/bench.h		patch \| blob \| history
tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/lpm_trie.h	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/lpm_trie_bench.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/lpm_trie_map.c	[new file with mode: 0644]	patch \| blob