]> www.infradead.org Git - users/hch/nvme-cli.git/commitdiff
bpf samples
authorChristoph Hellwig <hch@lst.de>
Mon, 9 Nov 2020 09:37:36 +0000 (10:37 +0100)
committerChristoph Hellwig <hch@lst.de>
Thu, 12 Nov 2020 10:13:16 +0000 (11:13 +0100)
bpf-samples/Makefile [new file with mode: 0644]
bpf-samples/find_xfs.c [new file with mode: 0644]
bpf-samples/hello_world.c [new file with mode: 0644]
bpf-samples/merge.c [new file with mode: 0644]
bpf-samples/merge.json [new file with mode: 0644]
bpf-samples/merge.sh [new file with mode: 0755]
bpf-samples/myrocks.h [new file with mode: 0644]
bpf-samples/nvme-bpf-defs.h [new file with mode: 0644]
bpf-samples/nvme-bpf-helpers.h [new file with mode: 0644]
bpf-samples/scan.c [new file with mode: 0644]

diff --git a/bpf-samples/Makefile b/bpf-samples/Makefile
new file mode 100644 (file)
index 0000000..2a162ae
--- /dev/null
@@ -0,0 +1,12 @@
+
+CC=clang
+CFLAGS=-Wall -Os --target=bpf
+
+all: hello_world.o find_xfs.o scan.o merge.o
+
+merge.o: myrocks.h
+
+scan.o: myrocks.h
+
+clean:
+       rm -f *.o
diff --git a/bpf-samples/find_xfs.c b/bpf-samples/find_xfs.c
new file mode 100644 (file)
index 0000000..b9fbe92
--- /dev/null
@@ -0,0 +1,36 @@
+
+#include "nvme-bpf-defs.h"
+#include "nvme-bpf-helpers.h"
+
+#define XFS_SB_MAGIC   0x42534658      /* big endian */
+
+struct nvme_bpf_map_def __section("maps") my_map = {
+       .type = BPF_MAP_TYPE_ARRAY,
+       .key_size = sizeof(unsigned int),
+       .value_size = sizeof(unsigned long),
+       .max_entries = 8,
+};
+
+__section("nvme")
+int nvme_hello_world(struct nvme_bpf_ctx *ctx)
+{
+       unsigned int magic;
+
+       if (bpf_nvme_peek_data(ctx, &magic, sizeof(magic), 0) != 0)
+               return 1;
+
+       if (magic == XFS_SB_MAGIC) {
+               unsigned int idx = 0;
+               unsigned long init_val = 1;
+               unsigned long *val;
+
+               val = bpf_map_lookup_elem(&my_map, &idx);
+               if (val)
+                       *val += 1;
+               else
+                       bpf_map_update_elem(&my_map, &idx, &init_val, 0);
+
+               bpf_nvme_hello_world();
+       }
+       return 0;
+}
diff --git a/bpf-samples/hello_world.c b/bpf-samples/hello_world.c
new file mode 100644 (file)
index 0000000..4f4ab9f
--- /dev/null
@@ -0,0 +1,10 @@
+
+#include "nvme-bpf-defs.h"
+#include "nvme-bpf-helpers.h"
+
+__section("nvme")
+int nvme_hello_world(void *ctx)
+{
+       bpf_nvme_hello_world();
+       return 0;
+}
diff --git a/bpf-samples/merge.c b/bpf-samples/merge.c
new file mode 100644 (file)
index 0000000..5ffb5b5
--- /dev/null
@@ -0,0 +1,140 @@
+
+#include "myrocks.h"
+
+/* maxed out by the instruction limit */
+#undef MAX_KEYS_PER_PAGE
+#define MAX_KEYS_PER_PAGE       120
+
+#define NR_SST         5
+#define MAX_ENTRIES    20000
+
+struct nvme_bpf_map_def __section("maps") nr_keys_map = {
+       .type           = BPF_MAP_TYPE_ARRAY,
+       .key_size       = sizeof(uint32_t),
+       .value_size     = sizeof(uint32_t),
+       .max_entries    = NR_SST,
+};
+
+struct nvme_bpf_map_def __section("maps") curr_key_map = {
+       .type           = BPF_MAP_TYPE_ARRAY,
+       .key_size       = sizeof(uint32_t),
+       .value_size     = sizeof(uint32_t),
+       .max_entries    = NR_SST,
+};
+
+struct nvme_bpf_map_def __section("maps") l1_map = {
+       .type           = BPF_MAP_TYPE_ARRAY,
+       .key_size       = sizeof(uint32_t),
+       .value_size     = sizeof(struct block_data),
+       .max_entries    = MAX_ENTRIES / 10000,
+};
+
+struct nvme_bpf_map_def __section("maps") l2_map = {
+       .type           = BPF_MAP_TYPE_ARRAY,
+       .key_size       = sizeof(uint32_t),
+       .value_size     = sizeof(struct block_data),
+       .max_entries    = MAX_ENTRIES / 1000,
+};
+
+struct nvme_bpf_map_def __section("maps") l3_map = {
+       .type           = BPF_MAP_TYPE_ARRAY,
+       .key_size       = sizeof(uint32_t),
+       .value_size     = sizeof(struct block_data),
+       .max_entries    = MAX_ENTRIES / 100,
+};
+struct nvme_bpf_map_def __section("maps") l4_map = {
+       .type           = BPF_MAP_TYPE_ARRAY,
+       .key_size       = sizeof(uint32_t),
+       .value_size     = sizeof(struct block_data),
+       .max_entries    = MAX_ENTRIES / 10,
+};
+
+static __always_inline int read_keys_into_map(struct nvme_bpf_ctx *ctx,
+               struct nvme_bpf_map_def *map, uint32_t *nr_keys)
+{
+       uint32_t k;
+
+       for (k = 0; k < MAX_KEYS_PER_PAGE; k++) {
+               struct block_data *blk;
+               uint32_t idx = *nr_keys;
+
+               blk = bpf_map_lookup_elem(map, &idx);
+               if (!blk)
+                       return 1;
+               if (!read_block(ctx, blk, idx, NULL))
+                       return 1;
+               (*nr_keys)++;
+       }
+
+       return 0;
+}
+
+static __always_inline bool merge_key(struct block_data *blk, int idx,
+               struct nvme_bpf_map_def *map, uint32_t *nr_keys, bool *merged)
+{
+       struct block_data *b;
+       uint32_t *curr_key, ck;
+
+       curr_key = bpf_map_lookup_elem(&curr_key_map, &idx);
+       if (!curr_key)
+               return false;
+       ck = *curr_key;
+
+       b = bpf_map_lookup_elem(map, &ck);
+       if (b && b->key == blk->key) {
+               if (!*merged)
+                       (*nr_keys)++;
+               *merged = true;
+               (*curr_key)++;
+       }
+
+       return true;
+}
+
+static __always_inline bool merge_keys(struct nvme_bpf_ctx *ctx,
+               uint32_t *nr_keys)
+{
+       struct block_data blk;
+       uint32_t k;
+
+       for (k = 0; k < MAX_KEYS_PER_PAGE; k++) {
+               bool merged = false;
+
+               if (!read_block(ctx, &blk, *nr_keys, NULL))
+                       return 1;
+
+               merge_key(&blk, 0, &l1_map, nr_keys, &merged);
+               merge_key(&blk, 1, &l2_map, nr_keys, &merged);
+               merge_key(&blk, 2, &l3_map, nr_keys, &merged);
+               merge_key(&blk, 3, &l4_map, nr_keys, &merged);
+
+               if (!merged)
+                       (*nr_keys)++;
+       }
+
+       return 0;
+}
+
+__section("nvme")
+int scan(struct nvme_bpf_ctx *ctx)
+{
+       uint32_t idx = ctx->nsid - 1;
+       uint32_t *nr_keys;
+
+       nr_keys = bpf_map_lookup_elem(&nr_keys_map, &idx);
+       if (!nr_keys)
+               return 1;
+
+       switch (idx) {
+       case 0:
+               return read_keys_into_map(ctx, &l1_map, nr_keys);
+       case 1:
+               return read_keys_into_map(ctx, &l2_map, nr_keys);
+       case 2:
+               return read_keys_into_map(ctx, &l3_map, nr_keys);
+       case 3:
+               return read_keys_into_map(ctx, &l4_map, nr_keys);
+       default:
+               return merge_keys(ctx, nr_keys);
+       }
+}
diff --git a/bpf-samples/merge.json b/bpf-samples/merge.json
new file mode 100644 (file)
index 0000000..aefc06b
--- /dev/null
@@ -0,0 +1,76 @@
+{
+  "hosts": [
+    {
+      "nqn": "hostnqn"
+    }
+  ], 
+  "ports": [
+    {
+      "addr": {
+        "adrfam": "ipv4", 
+        "traddr": "192.168.7.68", 
+        "treq": "not specified", 
+        "trsvcid": "4420", 
+        "trtype": "loop"
+      }, 
+      "portid": 1, 
+      "referrals": [], 
+      "subsystems": [
+        "testnqn"
+      ]
+    }
+  ], 
+  "subsystems": [
+    {
+      "allowed_hosts": [
+        "hostnqn"
+      ], 
+      "attr": {
+        "allow_any_host": "0"
+      }, 
+      "namespaces": [
+        {
+          "device": {
+            "nguid": "ef90689c-6c46-d44c-89c1-4067801309a8", 
+            "path": "/dev/loop0"
+          }, 
+          "enable": 1, 
+          "nsid": 1
+        },
+        {
+          "device": {
+            "nguid": "ef90689c-6c46-d44c-89c1-4067801309a7", 
+            "path": "/dev/loop1"
+          }, 
+          "enable": 1, 
+          "nsid": 2
+        },
+        {
+          "device": {
+            "nguid": "ef90689c-6c46-d44c-89c1-4067801309a6", 
+            "path": "/dev/loop2"
+          }, 
+          "enable": 1, 
+          "nsid": 3
+        },
+        {
+          "device": {
+            "nguid": "ef90689c-6c46-d44c-89c1-4067801309a5", 
+            "path": "/dev/loop3"
+          }, 
+          "enable": 1, 
+          "nsid": 4
+        },
+        {
+          "device": {
+            "nguid": "ef90695c-6c46-d44c-89c1-4067801309a1", 
+            "path": "/dev/loop4"
+          }, 
+          "enable": 1, 
+          "nsid": 5
+        }
+      ], 
+      "nqn": "testnqn"
+    }
+  ]
+}
diff --git a/bpf-samples/merge.sh b/bpf-samples/merge.sh
new file mode 100755 (executable)
index 0000000..dd0168a
--- /dev/null
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+#
+# To prepare do a:
+#
+# truncate --size +512 $FILE
+#
+# for each l?.sst file to ensure that all data can read through the block device#
+
+set -e
+set +x
+
+TABLE_DIR=~/table_dir
+
+NVMETCLI=~/nvmetcli/nvmetcli
+
+NVME_CLI_DIR=~/nvme-cli
+NVME=${NVME_CLI_DIR}/nvme
+MERGE_PROG=${NVME_CLI_DIR}/bpf-samples/merge.o
+NVMET_CONFIG=${NVME_CLI_DIR}/bpf-samples/merge.json
+
+NVME_DEV="/dev/nvme0"
+SLOT="0"
+MAPS="/tmp/nvme-bpf-maps"
+
+
+losetup /dev/loop0 ${TABLE_DIR}/l1.sst
+losetup /dev/loop1 ${TABLE_DIR}/l2.sst
+losetup /dev/loop2 ${TABLE_DIR}/l3.sst
+losetup /dev/loop3 ${TABLE_DIR}/l4.sst
+losetup /dev/loop4 ${TABLE_DIR}/l5.sst
+
+${NVMETCLI} restore ${NVMET_CONFIG}
+echo "transport=loop,hostnqn=hostnqn,nqn=testnqn" > /dev/nvme-fabrics
+
+sleep 2
+
+${NVME} list
+
+${NVME} bpf load-program -p ${MERGE_PROG} -s ${SLOT} ${NVME_DEV} | tee ${MAPS}
+
+function verify
+{
+       dev=$1
+
+       size=$(blockdev --getsize $dev)
+        ${NVME} verify ${dev} -s 0 -c $((size - 1)) \
+               --dir-type=2 --dir-spec=${SLOT}
+}
+
+echo
+echo "Running eBPF programs..."
+verify ${NVME_DEV}n1
+verify ${NVME_DEV}n2
+verify ${NVME_DEV}n3
+verify ${NVME_DEV}n4
+verify ${NVME_DEV}n5
+echo "Done running eBPF programs."
+echo
+
+function lookup_elem
+{
+       map=$(grep nr_keys_map ${MAPS} | awk '{print $2}')
+       key=$1
+
+       ${NVME} bpf lookup-elem -m ${map} -k ${key} ${NVME_DEV}
+}
+
+echo -n "Total Keys Processed: "
+lookup_elem 4
+
+echo -n "L1: "
+lookup_elem 0
+
+echo -n "L2: "
+lookup_elem 1
+
+echo -n "L3: "
+lookup_elem 2
+
+echo -n "L4: "
+lookup_elem 3
diff --git a/bpf-samples/myrocks.h b/bpf-samples/myrocks.h
new file mode 100644 (file)
index 0000000..3a5562c
--- /dev/null
@@ -0,0 +1,133 @@
+
+#include "nvme-bpf-defs.h"
+#include "nvme-bpf-helpers.h"
+
+struct block_data {
+       uint32_t shared_bytes;
+       uint32_t unshared_bytes;
+       uint32_t value_length;
+       uint8_t type;
+       uint32_t key;
+       uint64_t myrocks_meta;
+       uint64_t seq_id;
+};
+
+static __always_inline void
+update_io_time(unsigned long *io_time, unsigned long start_time)
+{
+       if (io_time)
+               *io_time += (bpf_nvme_ktime_ns() - start_time);
+}
+
+static __always_inline uint32_t
+decode_varint_byte(struct nvme_bpf_ctx *ctx, uint32_t *out, uint8_t shift,
+               bool *done, unsigned long *io_time)
+{
+       unsigned long start_time = bpf_nvme_ktime_ns();
+       uint8_t data;
+
+       if (*done)
+               return true;
+
+       if (!bpf_nvme_read_data(ctx, &data, sizeof(data)))
+               return false;
+
+       *out |= (((uint32_t)data & 0x7F) << shift);
+       if (!(data & 0x80))
+               *done = true;
+       update_io_time(io_time, start_time);
+       return true;
+}
+
+static __always_inline bool
+decode_varint32(struct nvme_bpf_ctx *ctx, uint32_t *out, unsigned long *io_time)
+{
+       bool done = false;
+
+       *out = 0;
+       return decode_varint_byte(ctx, out, 0, &done, io_time) &&
+               decode_varint_byte(ctx, out, 7, &done, io_time) &&
+               decode_varint_byte(ctx, out, 14, &done, io_time) &&
+               decode_varint_byte(ctx, out, 21, &done, io_time) &&
+               decode_varint_byte(ctx, out, 28, &done, io_time);
+}
+
+static __always_inline bool
+decode_fixint8(struct nvme_bpf_ctx *ctx, uint8_t *out, unsigned long *io_time)
+{
+       unsigned long start_time = bpf_nvme_ktime_ns();
+       uint8_t data;
+
+       if (!bpf_nvme_read_data(ctx, &data, sizeof(data)))
+               return false;
+       *out = data;
+       update_io_time(io_time, start_time);
+       return true;
+}
+
+static __always_inline bool
+decode_fixint32be(struct nvme_bpf_ctx *ctx, uint32_t *out,
+               unsigned long *io_time)
+{
+       unsigned long start_time = bpf_nvme_ktime_ns();
+       uint8_t data[4];
+
+       if (!bpf_nvme_read_data(ctx, &data, sizeof(data)))
+               return false;
+       *out = ((uint32_t)data[3] << 24) |
+               (uint32_t)(data[2] << 16) |
+               (uint32_t)(data[1] << 8) |
+               data[0];
+       update_io_time(io_time, start_time);
+       return true;
+}
+
+static __always_inline bool
+decode_fixint56be(struct nvme_bpf_ctx *ctx, uint64_t *out,
+               unsigned long *io_time)
+{
+       unsigned long start_time = bpf_nvme_ktime_ns();
+       uint8_t data[7];
+
+       if (!bpf_nvme_read_data(ctx, &data, sizeof(data)))
+               return false;
+       *out = ((uint64_t)data[6] << 48) |
+               ((uint64_t)data[5] << 40) |
+               ((uint64_t)data[4] << 32) |
+               ((uint64_t)data[3] << 24) |
+               ((uint64_t)data[2] << 16) |
+               ((uint64_t)data[1] << 8) |
+               data[0];
+       update_io_time(io_time, start_time);
+       return true;
+}
+
+#define MIN_KEY_SIZE           (4 + 4 + 4 + 1 + 1 + 4 + 1)
+#define MAX_KEYS_PER_PAGE      ((4096 + MIN_KEY_SIZE - 1) / MIN_KEY_SIZE)
+
+static __always_inline bool
+read_block(struct nvme_bpf_ctx *ctx, struct block_data *blk, uint32_t nr_keys,
+       unsigned long *io_time)
+{
+       blk->key = 0;
+
+       if (decode_varint32(ctx, &blk->shared_bytes, io_time) &&
+           decode_varint32(ctx, &blk->unshared_bytes, io_time) &&
+           decode_varint32(ctx, &blk->value_length, io_time) &&
+           decode_fixint8(ctx, &blk->type, io_time) &&
+           decode_fixint56be(ctx, &blk->myrocks_meta, io_time) &&
+           decode_fixint32be(ctx, &blk->key, io_time) &&
+           decode_fixint56be(ctx, &blk->seq_id, io_time)) {
+               /*
+                * Crudely detect an overrun of the file into zeroed data at the
+                * end of the block.
+                */
+               if (nr_keys && !blk->key)
+                       return false;
+
+               bpf_nvme_skip_data(ctx, blk->value_length);
+               return true;
+       }
+
+       return false;
+}
diff --git a/bpf-samples/nvme-bpf-defs.h b/bpf-samples/nvme-bpf-defs.h
new file mode 100644 (file)
index 0000000..bef1519
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Definitions for use in the NVMe eBPF programs.
+ */
+
+#define __always_inline                inline __attribute__((__always_inline__))
+
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed short int16_t;
+typedef unsigned short uint16_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef signed long long int64_t;
+typedef unsigned long long uint64_t;
+typedef _Bool bool;
+#define false 0
+#define true 1
+
+#define NULL   ((void *)0)
+
+#define __section(NAME)  __attribute__((section(NAME), used))
+
+struct nvme_bpf_map_def {
+       uint32_t type;
+       uint32_t key_size;
+       uint32_t value_size;
+       uint32_t max_entries;
+       uint32_t map_flags;
+};
+
+enum nvme_bpf_map_type {
+       BPF_MAP_TYPE_UNSPEC,
+       BPF_MAP_TYPE_HASH,
+       BPF_MAP_TYPE_ARRAY,
+       /* more here.. */
+};
+
+/* public context seen by the eBPF programs */
+struct nvme_bpf_ctx {
+       uint32_t                offset; /* byte offset into the NVMe command */
+       uint32_t                size;   /* byte size of the accessed data area */
+       uint32_t                nsid;
+};
diff --git a/bpf-samples/nvme-bpf-helpers.h b/bpf-samples/nvme-bpf-helpers.h
new file mode 100644 (file)
index 0000000..10294b6
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+ * XXX: This should probably be autogenerated..
+ */
+
+#define BPF_FUNC_nvme_read_data                 5 /* BPF_FUNC_ktime_get_ns */
+#define BPF_FUNC_nvme_peek_data                 6 /* BPF_FUNC_trace_printk */
+#define BPF_FUNC_nvme_skip_data                 8 /* BPF_FUNC_get_smp_processor_id */
+
+#define BPF_FUNC_nvme_hello_world       9 /* BPF_FUNC_skb_store_bytes */
+#define BPF_FUNC_nvme_ktime_ns          42 /* BPF_FUNC_get_numa_node_id*/
+
+static void *(*bpf_map_lookup_elem)(void *map, const void *key) =(void *)1;
+static long (*bpf_map_update_elem)(void *map, const void *key,
+                                  const void *value, uint64_t flags) =(void *)2;
+
+static int (*bpf_nvme_read_data)(struct nvme_bpf_ctx * ctx, void *dst,
+                                 uint32_t size) =
+    (void *)BPF_FUNC_nvme_read_data;
+static int (*bpf_nvme_peek_data)(struct nvme_bpf_ctx * ctx, void *dst,
+                                 uint32_t size, uint32_t offset) =
+    (void *)BPF_FUNC_nvme_peek_data;
+static void (*bpf_nvme_skip_data)(struct nvme_bpf_ctx * ctx, uint32_t size) =
+    (void *)BPF_FUNC_nvme_skip_data;
+
+static void (*bpf_nvme_hello_world)(void) = (void *)BPF_FUNC_nvme_hello_world;
+static unsigned int (*bpf_nvme_ktime_ns)(void) = (void *)BPF_FUNC_nvme_ktime_ns;
diff --git a/bpf-samples/scan.c b/bpf-samples/scan.c
new file mode 100644 (file)
index 0000000..7b1ea62
--- /dev/null
@@ -0,0 +1,63 @@
+
+#include "myrocks.h"
+
+enum {
+       NR_KEYS_IDX,
+       MAX_MAPS,
+};
+
+struct nvme_bpf_map_def __section("maps") scan_map = {
+       .type           = BPF_MAP_TYPE_ARRAY,
+       .key_size       = sizeof(uint32_t),
+       .value_size     = sizeof(uint64_t),
+       .max_entries    = MAX_MAPS,
+};
+
+struct nvme_bpf_map_def __section("maps") time_map = {
+       .type           = BPF_MAP_TYPE_ARRAY,
+       .key_size       = sizeof(uint32_t),
+       .value_size     = sizeof(uint64_t),
+       .max_entries    = 1,
+};
+
+struct nvme_bpf_map_def __section("maps") io_time_map = {
+       .type           = BPF_MAP_TYPE_ARRAY,
+       .key_size       = sizeof(uint32_t),
+       .value_size     = sizeof(uint64_t),
+       .max_entries    = 1,
+};
+
+__section("nvme")
+int scan(struct nvme_bpf_ctx *ctx)
+{
+       uint32_t nr_keys_idx = NR_KEYS_IDX;
+       uint32_t time_idx = NR_KEYS_IDX;
+       uint32_t time_io_idx = NR_KEYS_IDX;
+       unsigned long *nr_keys, *total_time, *io_time;
+       unsigned long start_time;
+       uint32_t key;
+
+       nr_keys = bpf_map_lookup_elem(&scan_map, &nr_keys_idx);
+       if (!nr_keys)
+               return 1;
+       total_time = bpf_map_lookup_elem(&time_map, &time_idx);
+       if (!total_time)
+               return 1;
+       io_time = bpf_map_lookup_elem(&io_time_map, &time_io_idx);
+       if (!io_time)
+               return 1;
+
+       start_time = bpf_nvme_ktime_ns();
+       for (key = 0; key < MAX_KEYS_PER_PAGE; key++) {
+               struct block_data blk;
+
+               if (!read_block(ctx, &blk, *nr_keys, io_time)) {
+                       if (key)
+                               break;
+                       return 1;
+               }
+               (*nr_keys)++;
+       }
+       update_io_time(total_time, start_time);
+       return 0;
+}