extern struct argp bench_hashmap_lookup_argp;
 extern struct argp bench_local_storage_create_argp;
 extern struct argp bench_htab_mem_argp;
+extern struct argp bench_trigger_batch_argp;
 
 static const struct argp_child bench_parsers[] = {
        { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
        { &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
        { &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
        { &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
+       { &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
        {},
 };
 
 extern const struct bench bench_trig_fentry_sleep;
 extern const struct bench bench_trig_fmodret;
 
+/* batched, staying mostly in-kernel benchmarks */
+extern const struct bench bench_trig_kernel_count;
+extern const struct bench bench_trig_kprobe_batch;
+extern const struct bench bench_trig_kretprobe_batch;
+extern const struct bench bench_trig_kprobe_multi_batch;
+extern const struct bench bench_trig_kretprobe_multi_batch;
+extern const struct bench bench_trig_fentry_batch;
+extern const struct bench bench_trig_fexit_batch;
+
 /* uprobe/uretprobe benchmarks */
 extern const struct bench bench_trig_uprobe_nop;
 extern const struct bench bench_trig_uretprobe_nop;
        &bench_rename_fexit,
        /* pure counting benchmarks for establishing theoretical limits */
        &bench_trig_usermode_count,
-       &bench_trig_base,
+       &bench_trig_kernel_count,
        /* syscall-driven triggering benchmarks */
        &bench_trig_tp,
        &bench_trig_rawtp,
        &bench_trig_fexit,
        &bench_trig_fentry_sleep,
        &bench_trig_fmodret,
+       /* batched, staying mostly in-kernel triggers */
+       &bench_trig_kprobe_batch,
+       &bench_trig_kretprobe_batch,
+       &bench_trig_kprobe_multi_batch,
+       &bench_trig_kretprobe_multi_batch,
+       &bench_trig_fentry_batch,
+       &bench_trig_fexit_batch,
        /* uprobes */
        &bench_trig_uprobe_nop,
        &bench_trig_uretprobe_nop,
        &bench_trig_uretprobe_push,
        &bench_trig_uprobe_ret,
        &bench_trig_uretprobe_ret,
+       /* ringbuf/perfbuf benchmarks */
        &bench_rb_libbpf,
        &bench_rb_custom,
        &bench_pb_libbpf,
 
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
 #define _GNU_SOURCE
+#include <argp.h>
 #include <unistd.h>
+#include <stdint.h>
 #include "bench.h"
 #include "trigger_bench.skel.h"
 #include "trace_helpers.h"
 
+#define MAX_TRIG_BATCH_ITERS 1000
+
+static struct {
+       __u32 batch_iters;
+} args = {
+       .batch_iters = 100,
+};
+
+enum {
+       ARG_TRIG_BATCH_ITERS = 7000,
+};
+
+static const struct argp_option opts[] = {
+       { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
+               "Number of in-kernel iterations per one driver test run"},
+       {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+       long ret;
+
+       switch (key) {
+       case ARG_TRIG_BATCH_ITERS:
+               ret = strtol(arg, NULL, 10);
+               if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
+                       fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
+                               1, MAX_TRIG_BATCH_ITERS);
+                       argp_usage(state);
+               }
+               args.batch_iters = ret;
+               break;
+       default:
+               return ARGP_ERR_UNKNOWN;
+       }
+
+       return 0;
+}
+
+const struct argp bench_trigger_batch_argp = {
+       .options = opts,
+       .parser = parse_arg,
+};
+
 /* adjust slot shift in inc_hits() if changing */
 #define MAX_BUCKETS 256
 
 static struct trigger_ctx {
        struct trigger_bench *skel;
        bool usermode_counters;
+       int driver_prog_fd;
 } ctx;
 
 static struct counter base_hits[MAX_BUCKETS];
        return NULL;
 }
 
+static void *trigger_producer_batch(void *input)
+{
+       int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
+
+       while (true)
+               bpf_prog_test_run_opts(fd, NULL);
+
+       return NULL;
+}
+
 static void trigger_measure(struct bench_res *res)
 {
        if (ctx.usermode_counters)
 
 static void setup_ctx(void)
 {
+       int err;
+
        setup_libbpf();
 
-       ctx.skel = trigger_bench__open_and_load();
+       ctx.skel = trigger_bench__open();
        if (!ctx.skel) {
                fprintf(stderr, "failed to open skeleton\n");
                exit(1);
        }
+
+       ctx.skel->rodata->batch_iters = args.batch_iters;
+
+       err = trigger_bench__load(ctx.skel);
+       if (err) {
+               fprintf(stderr, "failed to open skeleton\n");
+               exit(1);
+       }
 }
 
 static void attach_bpf(struct bpf_program *prog)
        attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
 }
 
+/* Batched, staying mostly in-kernel triggering setups */
+static void trigger_kernel_count_setup(void)
+{
+       setup_ctx();
+       /* override driver program */
+       ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
+}
+
+static void trigger_kprobe_batch_setup(void)
+{
+       setup_ctx();
+       attach_bpf(ctx.skel->progs.bench_trigger_kprobe_batch);
+}
+
+static void trigger_kretprobe_batch_setup(void)
+{
+       setup_ctx();
+       attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_batch);
+}
+
+static void trigger_kprobe_multi_batch_setup(void)
+{
+       setup_ctx();
+       attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi_batch);
+}
+
+static void trigger_kretprobe_multi_batch_setup(void)
+{
+       setup_ctx();
+       attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi_batch);
+}
+
+static void trigger_fentry_batch_setup(void)
+{
+       setup_ctx();
+       attach_bpf(ctx.skel->progs.bench_trigger_fentry_batch);
+}
+
+static void trigger_fexit_batch_setup(void)
+{
+       setup_ctx();
+       attach_bpf(ctx.skel->progs.bench_trigger_fexit_batch);
+}
+
 /* make sure call is not inlined and not avoided by compiler, so __weak and
  * inline asm volatile in the body of the function
  *
        .report_final = hits_drops_report_final,
 };
 
+/* batched (staying mostly in kernel) kprobe/fentry benchmarks */
+#define BENCH_TRIG_BATCH(KIND, NAME)                                   \
+const struct bench bench_trig_##KIND = {                               \
+       .name = "trig-" NAME,                                           \
+       .setup = trigger_##KIND##_setup,                                \
+       .producer_thread = trigger_producer_batch,                      \
+       .measure = trigger_measure,                                     \
+       .report_progress = hits_drops_report_progress,                  \
+       .report_final = hits_drops_report_final,                        \
+       .argp = &bench_trigger_batch_argp,                              \
+}
+
+BENCH_TRIG_BATCH(kernel_count, "kernel-count");
+BENCH_TRIG_BATCH(kprobe_batch, "kprobe-batch");
+BENCH_TRIG_BATCH(kretprobe_batch, "kretprobe-batch");
+BENCH_TRIG_BATCH(kprobe_multi_batch, "kprobe-multi-batch");
+BENCH_TRIG_BATCH(kretprobe_multi_batch, "kretprobe-multi-batch");
+BENCH_TRIG_BATCH(fentry_batch, "fentry-batch");
+BENCH_TRIG_BATCH(fexit_batch, "fexit-batch");
+
 /* uprobe benchmarks */
 #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME)                      \
 const struct bench bench_trig_##KIND = {                               \
 
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2020 Facebook
-
 #include <linux/bpf.h>
 #include <asm/unistd.h>
 #include <bpf/bpf_helpers.h>
        inc_counter();
        return 0;
 }
+
+const volatile int batch_iters = 0;
+
+SEC("raw_tp")
+int trigger_count(void *ctx)
+{
+       int i;
+
+       for (i = 0; i < batch_iters; i++)
+               inc_counter();
+
+       return 0;
+}
+
+SEC("raw_tp")
+int trigger_driver(void *ctx)
+{
+       int i;
+
+       for (i = 0; i < batch_iters; i++)
+               (void)bpf_get_numa_node_id(); /* attach point for benchmarking */
+
+       return 0;
+}
+
+SEC("kprobe/bpf_get_numa_node_id")
+int bench_trigger_kprobe_batch(void *ctx)
+{
+       inc_counter();
+       return 0;
+}
+
+SEC("kretprobe/bpf_get_numa_node_id")
+int bench_trigger_kretprobe_batch(void *ctx)
+{
+       inc_counter();
+       return 0;
+}
+
+SEC("kprobe.multi/bpf_get_numa_node_id")
+int bench_trigger_kprobe_multi_batch(void *ctx)
+{
+       inc_counter();
+       return 0;
+}
+
+SEC("kretprobe.multi/bpf_get_numa_node_id")
+int bench_trigger_kretprobe_multi_batch(void *ctx)
+{
+       inc_counter();
+       return 0;
+}
+
+SEC("fentry/bpf_get_numa_node_id")
+int bench_trigger_fentry_batch(void *ctx)
+{
+       inc_counter();
+       return 0;
+}
+
+SEC("fexit/bpf_get_numa_node_id")
+int bench_trigger_fexit_batch(void *ctx)
+{
+       inc_counter();
+       return 0;
+}