From 40f34d6f12e292875b8027ec66038cabb5a317f6 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 26 Sep 2024 13:30:42 +0200 Subject: [PATCH 01/16] bpf: Call kfree(obj) only once in free_one() A kfree() call is always used at the end of this function implementation. Thus specify such a function call only once instead of duplicating it in a previous if branch. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/08987123-668c-40f3-a8ee-c3038d94f069@web.de Signed-off-by: Alexei Starovoitov --- kernel/bpf/memalloc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index b3858a76e0b3..1a1b4458114c 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -252,11 +252,8 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node, bool atomic) static void free_one(void *obj, bool percpu) { - if (percpu) { + if (percpu) free_percpu(((void __percpu **)obj)[1]); - kfree(obj); - return; - } kfree(obj); } -- 2.51.0 From e8957c0dde3c945db2403beb3131a5ad84860b5f Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:40 -0700 Subject: [PATCH 02/16] libbpf: Improve log message formatting Fix missing newlines and extraneous terminal spaces in messages. Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/086884b7cbf87e524d584f9bf87f7a580e378b2b.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 6 +++--- tools/lib/bpf/btf_dump.c | 2 +- tools/lib/bpf/btf_relocate.c | 2 +- tools/lib/bpf/libbpf.c | 4 ++-- tools/lib/bpf/relo_core.c | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3c131039c523..e1d4cbd83729 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2941,7 +2941,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, /* If no records, return failure now so .BTF.ext won't be used. */ if (!info_left) { - pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); + pr_debug("%s section in .BTF.ext has no records\n", ext_sec->desc); return -EINVAL; } @@ -3029,7 +3029,7 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) if (data_size < offsetofend(struct btf_ext_header, hdr_len) || data_size < hdr->hdr_len) { - pr_debug("BTF.ext header not found"); + pr_debug("BTF.ext header not found\n"); return -EINVAL; } @@ -3291,7 +3291,7 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) d = btf_dedup_new(btf, opts); if (IS_ERR(d)) { - pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + pr_debug("btf_dedup_new failed: %ld\n", PTR_ERR(d)); return libbpf_err(-EINVAL); } diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 0a7327541c17..8440c2c5ad3e 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -1304,7 +1304,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, * chain, restore stack, emit warning, and try to * proceed nevertheless */ - pr_warn("not enough memory for decl stack:%d", err); + pr_warn("not enough memory for decl stack: %d\n", err); d->decl_stack_cnt = stack_start; return; } diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c index 4f7399d85eab..b72f83e15156 100644 --- a/tools/lib/bpf/btf_relocate.c +++ b/tools/lib/bpf/btf_relocate.c @@ -428,7 +428,7 @@ static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i) } else { off = r->str_map[*str_off]; if (!off) { - pr_warn("string '%s' [offset %u] is not mapped to base BTF", + pr_warn("string '%s' [offset %u] is not mapped to base BTF\n", btf__str_by_offset(r->btf, off), *str_off); return -ENOENT; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 1ef221df0541..a7cbb30605da 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -12765,7 +12765,7 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, } if (prog->type != BPF_PROG_TYPE_EXT) { - pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", + pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n", prog->name); return libbpf_err_ptr(-EINVAL); } @@ -13839,7 +13839,7 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) map_type = btf__type_by_id(btf, map_type_id); if (!btf_is_datasec(map_type)) { - pr_warn("type for map '%1$s' is not a datasec: %2$s", + pr_warn("type for map '%1$s' is not a datasec: %2$s\n", bpf_map__name(map), __btf_kind_str(btf_kind(map_type))); return libbpf_err(-EINVAL); diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 63a4d5ad12d1..7632e9d41827 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -1339,7 +1339,7 @@ int bpf_core_calc_relo_insn(const char *prog_name, cands->cands[i].id, cand_spec); if (err < 0) { bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); - pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ", + pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n", prog_name, relo_idx, i, spec_buf, err); return err; } -- 2.51.0 From 769ad3a61e335ced0a47e300b71b2206963e09a9 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:41 -0700 Subject: [PATCH 03/16] libbpf: Fix header comment typos for BTF.ext Mention struct btf_ext_info_sec rather than non-existent btf_sec_func_info in BTF.ext struct documentation. Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/cde65e01a5f2945c578485fab265ef711e2daeb6.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf_internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 408df59e0771..8cda511a1982 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -448,11 +448,11 @@ struct btf_ext_info { * * The func_info subsection layout: * record size for struct bpf_func_info in the func_info subsection - * struct btf_sec_func_info for section #1 + * struct btf_ext_info_sec for section #1 * a list of bpf_func_info records for section #1 * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h * but may not be identical - * struct btf_sec_func_info for section #2 + * struct btf_ext_info_sec for section #2 * a list of bpf_func_info records for section #2 * ...... * -- 2.51.0 From f896b4a5399e97af0b451fcf04754ed316935674 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:42 -0700 Subject: [PATCH 04/16] libbpf: Fix output .symtab byte-order during linking Object linking output data uses the default ELF_T_BYTE type for '.symtab' section data, which disables any libelf-based translation. Explicitly set the ELF_T_SYM type for output to restore libelf's byte-order conversion, noting that input '.symtab' data is already correctly translated. Fixes: faf6ed321cf6 ("libbpf: Add BPF static linker APIs") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/87868bfeccf3f51aec61260073f8778e9077050a.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/linker.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index e0005c6ade88..6985ab0f1ca9 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -396,6 +396,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) pr_warn_elf("failed to create SYMTAB data"); return -EINVAL; } + /* Ensure libelf translates byte-order of symbol records */ + sec->data->d_type = ELF_T_SYM; str_off = strset__add_str(linker->strtab_strs, sec->sec_name); if (str_off < 0) -- 2.51.0 From cf579164e9ea9cd41c7c1da931683a319d224890 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:43 -0700 Subject: [PATCH 05/16] libbpf: Support BTF.ext loading and output in either endianness Support for handling BTF data of either endianness was added in [1], but did not include BTF.ext data for lack of use cases. Later, support for static linking [2] provided a use case, but this feature and later ones were restricted to native-endian usage. Add support for BTF.ext handling in either endianness. Convert BTF.ext data to native endianness when read into memory for further processing, and support raw data access that restores the original byte-order for output. Add internal header functions for byte-swapping func, line, and core info records. Add new API functions btf_ext__endianness() and btf_ext__set_endianness() for query and setting byte-order, as already exist for BTF data. [1] 3289959b97ca ("libbpf: Support BTF loading and raw data output in both endianness") [2] 8fd27bf69b86 ("libbpf: Add BPF static linker BTF and BTF.ext support") Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/133407ab20e0dd5c07cab2a6fa7879dee1ffa4bc.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 276 +++++++++++++++++++++++++------- tools/lib/bpf/btf.h | 3 + tools/lib/bpf/libbpf.map | 2 + tools/lib/bpf/libbpf_internal.h | 29 ++++ 4 files changed, 253 insertions(+), 57 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index e1d4cbd83729..e9673c0ecbe7 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -2885,7 +2885,7 @@ int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, return btf_commit_type(btf, sz); } -struct btf_ext_sec_setup_param { +struct btf_ext_sec_info_param { __u32 off; __u32 len; __u32 min_rec_size; @@ -2893,14 +2893,20 @@ struct btf_ext_sec_setup_param { const char *desc; }; -static int btf_ext_setup_info(struct btf_ext *btf_ext, - struct btf_ext_sec_setup_param *ext_sec) +/* + * Parse a single info subsection of the BTF.ext info data: + * - validate subsection structure and elements + * - save info subsection start and sizing details in struct btf_ext + * - endian-independent operation, for calling before byte-swapping + */ +static int btf_ext_parse_sec_info(struct btf_ext *btf_ext, + struct btf_ext_sec_info_param *ext_sec, + bool is_native) { const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; size_t sec_cnt = 0; - /* The start of the info sec (including the __u32 record_size). */ void *info; if (ext_sec->len == 0) @@ -2912,6 +2918,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } + /* The start of the info sec (including the __u32 record_size). */ info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; info_left = ext_sec->len; @@ -2927,9 +2934,13 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - /* The record size needs to meet the minimum standard */ - record_size = *(__u32 *)info; + /* The record size needs to meet either the minimum standard or, when + * handling non-native endianness data, the exact standard so as + * to allow safe byte-swapping. + */ + record_size = is_native ? *(__u32 *)info : bswap_32(*(__u32 *)info); if (record_size < ext_sec->min_rec_size || + (!is_native && record_size != ext_sec->min_rec_size) || record_size & 0x03) { pr_debug("%s section in .BTF.ext has invalid record size %u\n", ext_sec->desc, record_size); @@ -2956,7 +2967,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - num_records = sinfo->num_info; + num_records = is_native ? sinfo->num_info : bswap_32(sinfo->num_info); if (num_records == 0) { pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); @@ -2984,64 +2995,157 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return 0; } -static int btf_ext_setup_func_info(struct btf_ext *btf_ext) +/* Parse all info secs in the BTF.ext info data */ +static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native) { - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param func_info = { .off = btf_ext->hdr->func_info_off, .len = btf_ext->hdr->func_info_len, .min_rec_size = sizeof(struct bpf_func_info_min), .ext_info = &btf_ext->func_info, .desc = "func_info" }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_line_info(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param line_info = { .off = btf_ext->hdr->line_info_off, .len = btf_ext->hdr->line_info_len, .min_rec_size = sizeof(struct bpf_line_info_min), .ext_info = &btf_ext->line_info, .desc = "line_info", }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_core_relos(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param core_relo = { .off = btf_ext->hdr->core_relo_off, .len = btf_ext->hdr->core_relo_len, .min_rec_size = sizeof(struct bpf_core_relo), .ext_info = &btf_ext->core_relo_info, .desc = "core_relo", }; + int err; + + err = btf_ext_parse_sec_info(btf_ext, &func_info, is_native); + if (err) + return err; + + err = btf_ext_parse_sec_info(btf_ext, &line_info, is_native); + if (err) + return err; + + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return 0; /* skip core relos parsing */ - return btf_ext_setup_info(btf_ext, ¶m); + err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native); + if (err) + return err; + + return 0; } -static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) +/* Swap byte-order of BTF.ext header with any endianness */ +static void btf_ext_bswap_hdr(struct btf_ext_header *h) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + bool is_native = h->magic == BTF_MAGIC; + __u32 hdr_len; - if (data_size < offsetofend(struct btf_ext_header, hdr_len) || - data_size < hdr->hdr_len) { - pr_debug("BTF.ext header not found\n"); + hdr_len = is_native ? h->hdr_len : bswap_32(h->hdr_len); + + h->magic = bswap_16(h->magic); + h->hdr_len = bswap_32(h->hdr_len); + h->func_info_off = bswap_32(h->func_info_off); + h->func_info_len = bswap_32(h->func_info_len); + h->line_info_off = bswap_32(h->line_info_off); + h->line_info_len = bswap_32(h->line_info_len); + + if (hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return; + + h->core_relo_off = bswap_32(h->core_relo_off); + h->core_relo_len = bswap_32(h->core_relo_len); +} + +/* Swap byte-order of generic info subsection */ +static void btf_ext_bswap_info_sec(void *info, __u32 len, bool is_native, + info_rec_bswap_fn bswap_fn) +{ + struct btf_ext_info_sec *sec; + __u32 info_left, rec_size, *rs; + + if (len == 0) + return; + + rs = info; /* info record size */ + rec_size = is_native ? *rs : bswap_32(*rs); + *rs = bswap_32(*rs); + + sec = info + sizeof(__u32); /* info sec #1 */ + info_left = len - sizeof(__u32); + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, num_recs; + void *p; + + num_recs = is_native ? sec->num_info : bswap_32(sec->num_info); + sec->sec_name_off = bswap_32(sec->sec_name_off); + sec->num_info = bswap_32(sec->num_info); + p = sec->data; /* info rec #1 */ + for (i = 0; i < num_recs; i++, p += rec_size) + bswap_fn(p); + sec = p; + info_left -= sec_hdrlen + (__u64)rec_size * num_recs; + } +} + +/* + * Swap byte-order of all info data in a BTF.ext section + * - requires BTF.ext hdr in native endianness + */ +static void btf_ext_bswap_info(struct btf_ext *btf_ext, void *data) +{ + const bool is_native = btf_ext->swapped_endian; + const struct btf_ext_header *h = data; + void *info; + + /* Swap func_info subsection byte-order */ + info = data + h->hdr_len + h->func_info_off; + btf_ext_bswap_info_sec(info, h->func_info_len, is_native, + (info_rec_bswap_fn)bpf_func_info_bswap); + + /* Swap line_info subsection byte-order */ + info = data + h->hdr_len + h->line_info_off; + btf_ext_bswap_info_sec(info, h->line_info_len, is_native, + (info_rec_bswap_fn)bpf_line_info_bswap); + + /* Swap core_relo subsection byte-order (if present) */ + if (h->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return; + + info = data + h->hdr_len + h->core_relo_off; + btf_ext_bswap_info_sec(info, h->core_relo_len, is_native, + (info_rec_bswap_fn)bpf_core_relo_bswap); +} + +/* Parse hdr data and info sections: check and convert to native endianness */ +static int btf_ext_parse(struct btf_ext *btf_ext) +{ + __u32 hdr_len, data_size = btf_ext->data_size; + struct btf_ext_header *hdr = btf_ext->hdr; + bool swapped_endian = false; + int err; + + if (data_size < offsetofend(struct btf_ext_header, hdr_len)) { + pr_debug("BTF.ext header too short\n"); return -EINVAL; } + hdr_len = hdr->hdr_len; if (hdr->magic == bswap_16(BTF_MAGIC)) { - pr_warn("BTF.ext in non-native endianness is not supported\n"); - return -ENOTSUP; + swapped_endian = true; + hdr_len = bswap_32(hdr_len); } else if (hdr->magic != BTF_MAGIC) { pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); return -EINVAL; } - if (hdr->version != BTF_VERSION) { + /* Ensure known version of structs, current BTF_VERSION == 1 */ + if (hdr->version != 1) { pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); return -ENOTSUP; } @@ -3051,11 +3155,39 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) return -ENOTSUP; } - if (data_size == hdr->hdr_len) { + if (data_size < hdr_len) { + pr_debug("BTF.ext header not found\n"); + return -EINVAL; + } else if (data_size == hdr_len) { pr_debug("BTF.ext has no data\n"); return -EINVAL; } + /* Verify mandatory hdr info details present */ + if (hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { + pr_warn("BTF.ext header missing func_info, line_info\n"); + return -EINVAL; + } + + /* Keep hdr native byte-order in memory for introspection */ + if (swapped_endian) + btf_ext_bswap_hdr(btf_ext->hdr); + + /* Validate info subsections and cache key metadata */ + err = btf_ext_parse_info(btf_ext, !swapped_endian); + if (err) + return err; + + /* Keep infos native byte-order in memory for introspection */ + if (swapped_endian) + btf_ext_bswap_info(btf_ext, btf_ext->data); + + /* + * Set btf_ext->swapped_endian only after all header and info data has + * been swapped, helping bswap functions determine if their data are + * in native byte-order when called. + */ + btf_ext->swapped_endian = swapped_endian; return 0; } @@ -3067,6 +3199,7 @@ void btf_ext__free(struct btf_ext *btf_ext) free(btf_ext->line_info.sec_idxs); free(btf_ext->core_relo_info.sec_idxs); free(btf_ext->data); + free(btf_ext->data_swapped); free(btf_ext); } @@ -3087,29 +3220,7 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) } memcpy(btf_ext->data, data, size); - err = btf_ext_parse_hdr(btf_ext->data, size); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { - err = -EINVAL; - goto done; - } - - err = btf_ext_setup_func_info(btf_ext); - if (err) - goto done; - - err = btf_ext_setup_line_info(btf_ext); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) - goto done; /* skip core relos parsing */ - - err = btf_ext_setup_core_relos(btf_ext); - if (err) - goto done; + err = btf_ext_parse(btf_ext); done: if (err) { @@ -3120,15 +3231,66 @@ done: return btf_ext; } +static void *btf_ext_raw_data(const struct btf_ext *btf_ext_ro, bool swap_endian) +{ + struct btf_ext *btf_ext = (struct btf_ext *)btf_ext_ro; + const __u32 data_sz = btf_ext->data_size; + void *data; + + /* Return native data (always present) or swapped data if present */ + if (!swap_endian) + return btf_ext->data; + else if (btf_ext->data_swapped) + return btf_ext->data_swapped; + + /* Recreate missing swapped data, then cache and return */ + data = calloc(1, data_sz); + if (!data) + return NULL; + memcpy(data, btf_ext->data, data_sz); + + btf_ext_bswap_info(btf_ext, data); + btf_ext_bswap_hdr(data); + btf_ext->data_swapped = data; + return data; +} + const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size) { + void *data; + + data = btf_ext_raw_data(btf_ext, btf_ext->swapped_endian); + if (!data) + return errno = ENOMEM, NULL; + *size = btf_ext->data_size; - return btf_ext->data; + return data; } __attribute__((alias("btf_ext__raw_data"))) const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); +enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext) +{ + if (is_host_big_endian()) + return btf_ext->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN; + else + return btf_ext->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; +} + +int btf_ext__set_endianness(struct btf_ext *btf_ext, enum btf_endianness endian) +{ + if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN) + return libbpf_err(-EINVAL); + + btf_ext->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN); + + if (!btf_ext->swapped_endian) { + free(btf_ext->data_swapped); + btf_ext->data_swapped = NULL; + } + return 0; +} struct btf_dedup; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4e349ad79ee6..47ee8f6ac489 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -167,6 +167,9 @@ LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size); +LIBBPF_API enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext); +LIBBPF_API int btf_ext__set_endianness(struct btf_ext *btf_ext, + enum btf_endianness endian); LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 0096e483f7eb..f40ccc2946e7 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -421,6 +421,8 @@ LIBBPF_1.5.0 { global: btf__distill_base; btf__relocate; + btf_ext__endianness; + btf_ext__set_endianness; bpf_map__autoattach; bpf_map__set_autoattach; bpf_object__token_fd; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 8cda511a1982..182529360c84 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -10,6 +10,7 @@ #define __LIBBPF_LIBBPF_INTERNAL_H #include +#include #include #include #include @@ -484,6 +485,8 @@ struct btf_ext { struct btf_ext_header *hdr; void *data; }; + void *data_swapped; + bool swapped_endian; struct btf_ext_info func_info; struct btf_ext_info line_info; struct btf_ext_info core_relo_info; @@ -511,6 +514,32 @@ struct bpf_line_info_min { __u32 line_col; }; +/* Functions to byte-swap info records */ + +typedef void (*info_rec_bswap_fn)(void *); + +static inline void bpf_func_info_bswap(struct bpf_func_info *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->type_id = bswap_32(i->type_id); +} + +static inline void bpf_line_info_bswap(struct bpf_line_info *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->file_name_off = bswap_32(i->file_name_off); + i->line_off = bswap_32(i->line_off); + i->line_col = bswap_32(i->line_col); +} + +static inline void bpf_core_relo_bswap(struct bpf_core_relo *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->type_id = bswap_32(i->type_id); + i->access_str_off = bswap_32(i->access_str_off); + i->kind = bswap_32(i->kind); +} + enum btf_field_iter_kind { BTF_FIELD_ITER_IDS, BTF_FIELD_ITER_STRS, -- 2.51.0 From bcc60abd67c7741a065480df4875534f62f758b1 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:44 -0700 Subject: [PATCH 06/16] libbpf: Support opening bpf objects of either endianness Allow bpf_object__open() to access files of either endianness, and convert included BPF programs to native byte-order in-memory for introspection. Loading BPF objects of non-native byte-order is still disallowed however. Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/26353c1a1887a54400e1acd6c138fa90c99cdd40.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 51 +++++++++++++++++++++++++++------ tools/lib/bpf/libbpf_internal.h | 10 +++++++ 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a7cbb30605da..041228674bfa 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -694,6 +694,8 @@ struct bpf_object { /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ struct elf_state efile; + unsigned char byteorder; + struct btf *btf; struct btf_ext *btf_ext; @@ -940,6 +942,20 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return 0; } +static void bpf_object_bswap_progs(struct bpf_object *obj) +{ + struct bpf_program *prog = obj->programs; + struct bpf_insn *insn; + int p, i; + + for (p = 0; p < obj->nr_programs; p++, prog++) { + insn = prog->insns; + for (i = 0; i < prog->insns_cnt; i++, insn++) + bpf_insn_bswap(insn); + } + pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs); +} + static const struct btf_member * find_member_by_offset(const struct btf_type *t, __u32 bit_offset) { @@ -1506,6 +1522,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) elf_end(obj->efile.elf); obj->efile.elf = NULL; + obj->efile.ehdr = NULL; obj->efile.symbols = NULL; obj->efile.arena_data = NULL; @@ -1571,6 +1588,16 @@ static int bpf_object__elf_init(struct bpf_object *obj) goto errout; } + /* Validate ELF object endianness... */ + if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + err = -LIBBPF_ERRNO__ENDIAN; + pr_warn("elf: '%s' has unknown byte order\n", obj->path); + goto errout; + } + /* and save after bpf_object_open() frees ELF data */ + obj->byteorder = ehdr->e_ident[EI_DATA]; + if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); @@ -1599,19 +1626,15 @@ errout: return err; } -static int bpf_object__check_endianness(struct bpf_object *obj) +static bool is_native_endianness(struct bpf_object *obj) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - return 0; + return obj->byteorder == ELFDATA2LSB; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) - return 0; + return obj->byteorder == ELFDATA2MSB; #else # error "Unrecognized __BYTE_ORDER__" #endif - pr_warn("elf: endianness mismatch in %s.\n", obj->path); - return -LIBBPF_ERRNO__ENDIAN; } static int @@ -3954,6 +3977,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return -LIBBPF_ERRNO__FORMAT; } + /* change BPF program insns to native endianness for introspection */ + if (!is_native_endianness(obj)) + bpf_object_bswap_progs(obj); + /* sort BPF programs by section name and in-section instruction offset * for faster search */ @@ -7997,7 +8024,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, } err = bpf_object__elf_init(obj); - err = err ? : bpf_object__check_endianness(obj); err = err ? : bpf_object__elf_collect(obj); err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object_fixup_btf(obj); @@ -8503,8 +8529,15 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch return libbpf_err(-EINVAL); } - if (obj->gen_loader) + /* Disallow kernel loading programs of non-native endianness but + * permit cross-endian creation of "light skeleton". + */ + if (obj->gen_loader) { bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); + } else if (!is_native_endianness(obj)) { + pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name); + return libbpf_err(-LIBBPF_ERRNO__ENDIAN); + } err = bpf_object_prepare_token(obj); err = err ? : bpf_object__probe_loading(obj); diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 182529360c84..de498e2dd6b0 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -617,6 +617,16 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +static inline void bpf_insn_bswap(struct bpf_insn *insn) +{ + __u8 tmp_reg = insn->dst_reg; + + insn->dst_reg = insn->src_reg; + insn->src_reg = tmp_reg; + insn->off = bswap_16(insn->off); + insn->imm = bswap_32(insn->imm); +} + /* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. * Original FD is not closed or altered in any other way. * Preserves original FD value, if it's invalid (negative). -- 2.51.0 From 0aed726cf6f97bfe11de693781504787857894b7 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:45 -0700 Subject: [PATCH 07/16] libbpf: Support linking bpf objects of either endianness Allow static linking object files of either endianness, checking that input files have consistent byte-order, and setting output endianness from input. Linking requires in-memory processing of programs, relocations, sections, etc. in native endianness, and output conversion to target byte-order. This is enabled by built-in ELF translation and recent BTF/BTF.ext endianness functions. Further add local functions for swapping byte-order of sections containing BPF insns. Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/b47ca686d02664843fc99b96262fe3259650bc43.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/linker.c | 78 +++++++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 16 deletions(-) diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 6985ab0f1ca9..81dbbdd79a7c 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -135,6 +135,7 @@ struct bpf_linker { int fd; Elf *elf; Elf64_Ehdr *elf_hdr; + bool swapped_endian; /* Output sections metadata */ struct dst_sec *secs; @@ -324,13 +325,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->elf_hdr->e_machine = EM_BPF; linker->elf_hdr->e_type = ET_REL; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif + /* Set unknown ELF endianness, assign later from input files */ + linker->elf_hdr->e_ident[EI_DATA] = ELFDATANONE; /* STRTAB */ /* initialize strset with an empty string to conform to ELF */ @@ -541,19 +537,21 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts, struct src_obj *obj) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - const int host_endianness = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - const int host_endianness = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif int err = 0; Elf_Scn *scn; Elf_Data *data; Elf64_Ehdr *ehdr; Elf64_Shdr *shdr; struct src_sec *sec; + unsigned char obj_byteorder; + unsigned char link_byteorder = linker->elf_hdr->e_ident[EI_DATA]; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2MSB; +#else +#error "Unknown __BYTE_ORDER__" +#endif pr_debug("linker: adding object file '%s'...\n", filename); @@ -579,11 +577,25 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, pr_warn_elf("failed to get ELF header for %s", filename); return err; } - if (ehdr->e_ident[EI_DATA] != host_endianness) { + + /* Linker output endianness set by first input object */ + obj_byteorder = ehdr->e_ident[EI_DATA]; + if (obj_byteorder != ELFDATA2LSB && obj_byteorder != ELFDATA2MSB) { + err = -EOPNOTSUPP; + pr_warn("unknown byte order of ELF file %s\n", filename); + return err; + } + if (link_byteorder == ELFDATANONE) { + linker->elf_hdr->e_ident[EI_DATA] = obj_byteorder; + linker->swapped_endian = obj_byteorder != host_byteorder; + pr_debug("linker: set %s-endian output byte order\n", + obj_byteorder == ELFDATA2MSB ? "big" : "little"); + } else if (link_byteorder != obj_byteorder) { err = -EOPNOTSUPP; - pr_warn_elf("unsupported byte order of ELF file %s", filename); + pr_warn("byte order mismatch with ELF file %s\n", filename); return err; } + if (ehdr->e_type != ET_REL || ehdr->e_machine != EM_BPF || ehdr->e_ident[EI_CLASS] != ELFCLASS64) { @@ -1111,6 +1123,24 @@ static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec return true; } +static bool is_exec_sec(struct dst_sec *sec) +{ + if (!sec || sec->ephemeral) + return false; + return (sec->shdr->sh_type == SHT_PROGBITS) && + (sec->shdr->sh_flags & SHF_EXECINSTR); +} + +static void exec_sec_bswap(void *raw_data, int size) +{ + const int insn_cnt = size / sizeof(struct bpf_insn); + struct bpf_insn *insn = raw_data; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); +} + static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src) { void *tmp; @@ -1170,6 +1200,10 @@ static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz); /* now copy src data at a properly aligned offset */ memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size); + + /* convert added bpf insns to native byte-order */ + if (linker->swapped_endian && is_exec_sec(dst)) + exec_sec_bswap(dst->raw_data + dst_align_sz, src->shdr->sh_size); } dst->sec_sz = dst_final_sz; @@ -2630,6 +2664,10 @@ int bpf_linker__finalize(struct bpf_linker *linker) if (!sec->scn) continue; + /* restore sections with bpf insns to target byte-order */ + if (linker->swapped_endian && is_exec_sec(sec)) + exec_sec_bswap(sec->raw_data, sec->sec_sz); + sec->data->d_buf = sec->raw_data; } @@ -2698,6 +2736,7 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name, static int finalize_btf(struct bpf_linker *linker) { + enum btf_endianness link_endianness; LIBBPF_OPTS(btf_dedup_opts, opts); struct btf *btf = linker->btf; const void *raw_data; @@ -2742,6 +2781,13 @@ static int finalize_btf(struct bpf_linker *linker) return err; } + /* Set .BTF and .BTF.ext output byte order */ + link_endianness = linker->elf_hdr->e_ident[EI_DATA] == ELFDATA2MSB ? + BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; + btf__set_endianness(linker->btf, link_endianness); + if (linker->btf_ext) + btf_ext__set_endianness(linker->btf_ext, link_endianness); + /* Emit .BTF section */ raw_data = btf__raw_data(linker->btf, &raw_sz); if (!raw_data) -- 2.51.0 From 8ca3323dce43792b6a7bbbc3314fc2a281d3af92 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:46 -0700 Subject: [PATCH 08/16] libbpf: Support creating light skeleton of either endianness Track target endianness in 'struct bpf_gen' and process in-memory data in native byte-order, but on finalization convert the embedded loader BPF insns to target endianness. The light skeleton also includes a target-accessed data blob which is heterogeneous and thus difficult to convert to target byte-order on finalization. Add support functions to convert data to target endianness as it is added to the blob. Also add additional debug logging for data blob structure details and skeleton loading. Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/569562e1d5bf1cce80a1f1a3882461ee2da1ffd5.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf_gen_internal.h | 1 + tools/lib/bpf/gen_loader.c | 187 ++++++++++++++++++++++--------- tools/lib/bpf/libbpf.c | 1 + tools/lib/bpf/skel_internal.h | 3 +- 4 files changed, 139 insertions(+), 53 deletions(-) diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index fdf44403ff36..6ff963a491d9 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -34,6 +34,7 @@ struct bpf_gen { void *data_cur; void *insn_start; void *insn_cur; + bool swapped_endian; ssize_t cleanup_label; __u32 nr_progs; __u32 nr_maps; diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index cf3323fd47b8..8a2b0f62d91d 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -401,6 +401,15 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) opts->insns_sz = gen->insn_cur - gen->insn_start; opts->data = gen->data_start; opts->data_sz = gen->data_cur - gen->data_start; + + /* use target endianness for embedded loader */ + if (gen->swapped_endian) { + struct bpf_insn *insn = (struct bpf_insn *)opts->insns; + int insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); + + for (i = 0; i < insn_cnt; i++) + bpf_insn_bswap(insn++); + } } return gen->error; } @@ -414,6 +423,28 @@ void bpf_gen__free(struct bpf_gen *gen) free(gen); } +/* + * Fields of bpf_attr are set to values in native byte-order before being + * written to the target-bound data blob, and may need endian conversion. + * This macro allows providing the correct value in situ more simply than + * writing a separate converter for *all fields* of *all records* included + * in union bpf_attr. Note that sizeof(rval) should match the assignment + * target to avoid runtime problems. + */ +#define tgt_endian(rval) ({ \ + typeof(rval) _val = (rval); \ + if (gen->swapped_endian) { \ + switch (sizeof(_val)) { \ + case 1: break; \ + case 2: _val = bswap_16(_val); break; \ + case 4: _val = bswap_32(_val); break; \ + case 8: _val = bswap_64(_val); break; \ + default: pr_warn("unsupported bswap size!\n"); \ + } \ + } \ + _val; \ +}) + void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, __u32 btf_raw_size) { @@ -422,11 +453,12 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: load_btf: size %d\n", btf_raw_size); btf_data = add_data(gen, btf_raw_data, btf_raw_size); - attr.btf_size = btf_raw_size; + attr.btf_size = tgt_endian(btf_raw_size); btf_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: load_btf: off %d size %d, attr: off %d size %d\n", + btf_data, btf_raw_size, btf_load_attr, attr_size); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4, @@ -457,28 +489,29 @@ void bpf_gen__map_create(struct bpf_gen *gen, union bpf_attr attr; memset(&attr, 0, attr_size); - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.map_flags = map_attr->map_flags; - attr.map_extra = map_attr->map_extra; + attr.map_type = tgt_endian(map_type); + attr.key_size = tgt_endian(key_size); + attr.value_size = tgt_endian(value_size); + attr.map_flags = tgt_endian(map_attr->map_flags); + attr.map_extra = tgt_endian(map_attr->map_extra); if (map_name) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); - attr.numa_node = map_attr->numa_node; - attr.map_ifindex = map_attr->map_ifindex; - attr.max_entries = max_entries; - attr.btf_key_type_id = map_attr->btf_key_type_id; - attr.btf_value_type_id = map_attr->btf_value_type_id; - - pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", - attr.map_name, map_idx, map_type, attr.btf_value_type_id); + attr.numa_node = tgt_endian(map_attr->numa_node); + attr.map_ifindex = tgt_endian(map_attr->map_ifindex); + attr.max_entries = tgt_endian(max_entries); + attr.btf_key_type_id = tgt_endian(map_attr->btf_key_type_id); + attr.btf_value_type_id = tgt_endian(map_attr->btf_value_type_id); map_create_attr = add_data(gen, &attr, attr_size); - if (attr.btf_value_type_id) + pr_debug("gen: map_create: %s idx %d type %d value_type_id %d, attr: off %d size %d\n", + map_name, map_idx, map_type, map_attr->btf_value_type_id, + map_create_attr, attr_size); + + if (map_attr->btf_value_type_id) /* populate union bpf_attr with btf_fd saved in the stack earlier */ move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4, stack_off(btf_fd)); - switch (attr.map_type) { + switch (map_type) { case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4, @@ -498,8 +531,8 @@ void bpf_gen__map_create(struct bpf_gen *gen, /* emit MAP_CREATE command */ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", - attr.map_name, map_idx, map_type, value_size, - attr.btf_value_type_id); + map_name, map_idx, map_type, value_size, + map_attr->btf_value_type_id); emit_check_err(gen); /* remember map_fd in the stack, if successful */ if (map_idx < 0) { @@ -784,12 +817,12 @@ log: emit_ksym_relo_log(gen, relo, kdesc->ref); } -static __u32 src_reg_mask(void) +static __u32 src_reg_mask(struct bpf_gen *gen) { -#if defined(__LITTLE_ENDIAN_BITFIELD) - return 0x0f; /* src_reg,dst_reg,... */ -#elif defined(__BIG_ENDIAN_BITFIELD) - return 0xf0; /* dst_reg,src_reg,... */ +#if defined(__LITTLE_ENDIAN_BITFIELD) /* src_reg,dst_reg,... */ + return gen->swapped_endian ? 0xf0 : 0x0f; +#elif defined(__BIG_ENDIAN_BITFIELD) /* dst_reg,src_reg,... */ + return gen->swapped_endian ? 0x0f : 0xf0; #else #error "Unsupported bit endianness, cannot proceed" #endif @@ -840,7 +873,7 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 3)); clear_src_reg: /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ - reg_mask = src_reg_mask(); + reg_mask = src_reg_mask(gen); emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); @@ -931,48 +964,94 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) cleanup_core_relo(gen); } +/* Convert func, line, and core relo info blobs to target endianness */ +static void info_blob_bswap(struct bpf_gen *gen, int func_info, int line_info, + int core_relos, struct bpf_prog_load_opts *load_attr) +{ + struct bpf_func_info *fi = gen->data_start + func_info; + struct bpf_line_info *li = gen->data_start + line_info; + struct bpf_core_relo *cr = gen->data_start + core_relos; + int i; + + for (i = 0; i < load_attr->func_info_cnt; i++) + bpf_func_info_bswap(fi++); + + for (i = 0; i < load_attr->line_info_cnt; i++) + bpf_line_info_bswap(li++); + + for (i = 0; i < gen->core_relo_cnt; i++) + bpf_core_relo_bswap(cr++); +} + void bpf_gen__prog_load(struct bpf_gen *gen, enum bpf_prog_type prog_type, const char *prog_name, const char *license, struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *load_attr, int prog_idx) { + int func_info_tot_sz = load_attr->func_info_cnt * + load_attr->func_info_rec_size; + int line_info_tot_sz = load_attr->line_info_cnt * + load_attr->line_info_rec_size; + int core_relo_tot_sz = gen->core_relo_cnt * + sizeof(struct bpf_core_relo); int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", - prog_type, insn_cnt, prog_idx); /* add license string to blob of bytes */ license_off = add_data(gen, license, strlen(license) + 1); /* add insns to blob of bytes */ insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); + pr_debug("gen: prog_load: prog_idx %d type %d insn off %d insns_cnt %zd license off %d\n", + prog_idx, prog_type, insns_off, insn_cnt, license_off); - attr.prog_type = prog_type; - attr.expected_attach_type = load_attr->expected_attach_type; - attr.attach_btf_id = load_attr->attach_btf_id; - attr.prog_ifindex = load_attr->prog_ifindex; - attr.kern_version = 0; - attr.insn_cnt = (__u32)insn_cnt; - attr.prog_flags = load_attr->prog_flags; - - attr.func_info_rec_size = load_attr->func_info_rec_size; - attr.func_info_cnt = load_attr->func_info_cnt; - func_info = add_data(gen, load_attr->func_info, - attr.func_info_cnt * attr.func_info_rec_size); + /* convert blob insns to target endianness */ + if (gen->swapped_endian) { + struct bpf_insn *insn = gen->data_start + insns_off; + int i; - attr.line_info_rec_size = load_attr->line_info_rec_size; - attr.line_info_cnt = load_attr->line_info_cnt; - line_info = add_data(gen, load_attr->line_info, - attr.line_info_cnt * attr.line_info_rec_size); + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); + } - attr.core_relo_rec_size = sizeof(struct bpf_core_relo); - attr.core_relo_cnt = gen->core_relo_cnt; - core_relos = add_data(gen, gen->core_relos, - attr.core_relo_cnt * attr.core_relo_rec_size); + attr.prog_type = tgt_endian(prog_type); + attr.expected_attach_type = tgt_endian(load_attr->expected_attach_type); + attr.attach_btf_id = tgt_endian(load_attr->attach_btf_id); + attr.prog_ifindex = tgt_endian(load_attr->prog_ifindex); + attr.kern_version = 0; + attr.insn_cnt = tgt_endian((__u32)insn_cnt); + attr.prog_flags = tgt_endian(load_attr->prog_flags); + + attr.func_info_rec_size = tgt_endian(load_attr->func_info_rec_size); + attr.func_info_cnt = tgt_endian(load_attr->func_info_cnt); + func_info = add_data(gen, load_attr->func_info, func_info_tot_sz); + pr_debug("gen: prog_load: func_info: off %d cnt %d rec size %d\n", + func_info, load_attr->func_info_cnt, + load_attr->func_info_rec_size); + + attr.line_info_rec_size = tgt_endian(load_attr->line_info_rec_size); + attr.line_info_cnt = tgt_endian(load_attr->line_info_cnt); + line_info = add_data(gen, load_attr->line_info, line_info_tot_sz); + pr_debug("gen: prog_load: line_info: off %d cnt %d rec size %d\n", + line_info, load_attr->line_info_cnt, + load_attr->line_info_rec_size); + + attr.core_relo_rec_size = tgt_endian((__u32)sizeof(struct bpf_core_relo)); + attr.core_relo_cnt = tgt_endian(gen->core_relo_cnt); + core_relos = add_data(gen, gen->core_relos, core_relo_tot_sz); + pr_debug("gen: prog_load: core_relos: off %d cnt %d rec size %zd\n", + core_relos, gen->core_relo_cnt, + sizeof(struct bpf_core_relo)); + + /* convert all info blobs to target endianness */ + if (gen->swapped_endian) + info_blob_bswap(gen, func_info, line_info, core_relos, load_attr); libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); prog_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: prog_load: attr: off %d size %d\n", + prog_load_attr, attr_size); /* populate union bpf_attr with a pointer to license */ emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); @@ -1040,7 +1119,6 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, int zero = 0; memset(&attr, 0, attr_size); - pr_debug("gen: map_update_elem: idx %d\n", map_idx); value = add_data(gen, pvalue, value_size); key = add_data(gen, &zero, sizeof(zero)); @@ -1068,6 +1146,8 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_update_elem: idx %d, value: off %d size %d, attr: off %d size %d\n", + map_idx, value, value_size, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1084,14 +1164,16 @@ void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slo int attr_size = offsetofend(union bpf_attr, flags); int map_update_attr, key; union bpf_attr attr; + int tgt_slot; memset(&attr, 0, attr_size); - pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", - outer_map_idx, slot, inner_map_idx); - key = add_data(gen, &slot, sizeof(slot)); + tgt_slot = tgt_endian(slot); + key = add_data(gen, &tgt_slot, sizeof(tgt_slot)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: populate_outer_map: outer %d key %d inner %d, attr: off %d size %d\n", + outer_map_idx, slot, inner_map_idx, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, outer_map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1112,8 +1194,9 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_freeze: idx %d, attr: off %d size %d\n", + map_idx, map_freeze_attr, attr_size); move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); /* emit MAP_FREEZE command */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 041228674bfa..712b95e8891b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9134,6 +9134,7 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) if (!gen) return -ENOMEM; gen->opts = opts; + gen->swapped_endian = !is_native_endianness(obj); obj->gen_loader = gen; return 0; } diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 0875452521e9..4d5fa079b5d6 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -351,10 +351,11 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) attr.test.ctx_size_in = opts->ctx->sz; err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz); if (err < 0 || (int)attr.test.retval < 0) { - opts->errstr = "failed to execute loader prog"; if (err < 0) { + opts->errstr = "failed to execute loader prog"; set_err; } else { + opts->errstr = "error returned by loader prog"; err = (int)attr.test.retval; #ifndef __KERNEL__ errno = -err; -- 2.51.0 From 5a63c33d6f00e1739944cea2f445819951610c7d Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:47 -0700 Subject: [PATCH 09/16] selftests/bpf: Support cross-endian building Update Makefile build rules to compile BPF programs with target endianness rather than host byte-order. With recent changes, this allows building the full selftests/bpf suite hosted on x86_64 and targeting s390x or mips64eb for example. Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/880ccc6342cfc4d3c48b44f581e87adfbce2876e.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 365740f24d2e..e295e3df5ec6 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -445,6 +445,7 @@ endef IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - Date: Thu, 26 Sep 2024 15:49:48 +0100 Subject: [PATCH 10/16] selftests/bpf: Fix uprobe_multi compilation error MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When building selftests, the following was seen: uprobe_multi.c: In function ‘trigger_uprobe’: uprobe_multi.c:108:40: error: ‘MADV_PAGEOUT’ undeclared (first use in this function) 108 | madvise(addr, page_sz, MADV_PAGEOUT); | ^~~~~~~~~~~~ uprobe_multi.c:108:40: note: each undeclared identifier is reported only once for each function it appears in make: *** [Makefile:850: bpf-next/tools/testing/selftests/bpf/uprobe_multi] Error 1 ...even with updated UAPI headers. It seems the above value is defined in UAPI but including that file triggers other redefinition errors. Simplest solution is to add a guarded definition, as was done for MADV_POPULATE_READ. Fixes: 3c217a182018 ("selftests/bpf: add build ID tests") Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240926144948.172090-1-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/uprobe_multi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c index c7828b13e5ff..dd38dc68f635 100644 --- a/tools/testing/selftests/bpf/uprobe_multi.c +++ b/tools/testing/selftests/bpf/uprobe_multi.c @@ -12,6 +12,10 @@ #define MADV_POPULATE_READ 22 #endif +#ifndef MADV_PAGEOUT +#define MADV_PAGEOUT 21 +#endif + int __attribute__((weak)) uprobe(void) { return 0; -- 2.51.0 From 48b13cab1e7cb77def27cb89711fb5e3b04db972 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:09 -0700 Subject: [PATCH 11/16] bpf: Allow specifying bpf_fastcall attribute for BPF helpers Allow a new optional 'Attributes' section to be specified for helper functions description, e.g.: * u32 bpf_get_smp_processor_id(void) * ... * Return * ... * Attributes * __bpf_fastcall * Generated header for the example above: #ifndef __bpf_fastcall #if __has_attribute(__bpf_fastcall) #define __bpf_fastcall __attribute__((bpf_fastcall)) #else #define __bpf_fastcall #endif #endif ... __bpf_fastcall static __u32 (* const bpf_get_smp_processor_id)(void) = (void *) 8; The following rules apply: - when present, section must follow 'Return' section; - attribute names are specified on the line following 'Attribute' keyword; - attribute names are separated by spaces; - section ends with an "empty" line (" *\n"). Valid attribute names are recorded in the ATTRS map. ATTRS maps shortcut attribute name to correct C syntax. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- scripts/bpf_doc.py | 53 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index c55878bddfdd..e74a01a85070 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -37,10 +37,11 @@ class APIElement(object): @desc: textual description of the symbol @ret: (optional) description of any associated return value """ - def __init__(self, proto='', desc='', ret=''): + def __init__(self, proto='', desc='', ret='', attrs=[]): self.proto = proto self.desc = desc self.ret = ret + self.attrs = attrs class Helper(APIElement): @@ -81,6 +82,11 @@ class Helper(APIElement): return res +ATTRS = { + '__bpf_fastcall': 'bpf_fastcall' +} + + class HeaderParser(object): """ An object used to parse a file in order to extract the documentation of a @@ -111,7 +117,8 @@ class HeaderParser(object): proto = self.parse_proto() desc = self.parse_desc(proto) ret = self.parse_ret(proto) - return Helper(proto=proto, desc=desc, ret=ret) + attrs = self.parse_attrs(proto) + return Helper(proto=proto, desc=desc, ret=ret, attrs=attrs) def parse_symbol(self): p = re.compile(r' \* ?(BPF\w+)$') @@ -192,6 +199,28 @@ class HeaderParser(object): raise Exception("No return found for " + proto) return ret + def parse_attrs(self, proto): + p = re.compile(r' \* ?(?:\t| {5,8})Attributes$') + capture = p.match(self.line) + if not capture: + return [] + # Expect a single line with mnemonics for attributes separated by spaces + self.line = self.reader.readline() + p = re.compile(r' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') + capture = p.match(self.line) + if not capture: + raise Exception("Incomplete 'Attributes' section for " + proto) + attrs = capture.group(1).split(' ') + for attr in attrs: + if attr not in ATTRS: + raise Exception("Unexpected attribute '" + attr + "' specified for " + proto) + self.line = self.reader.readline() + if self.line != ' *\n': + raise Exception("Expecting empty line after 'Attributes' section for " + proto) + # Prepare a line for next self.parse_* to consume + self.line = self.reader.readline() + return attrs + def seek_to(self, target, help_message, discard_lines = 1): self.reader.seek(0) offset = self.reader.read().find(target) @@ -789,6 +818,21 @@ class PrinterHelpers(Printer): print('%s;' % fwd) print('') + used_attrs = set() + for helper in self.elements: + for attr in helper.attrs: + used_attrs.add(attr) + for attr in sorted(used_attrs): + print('#ifndef %s' % attr) + print('#if __has_attribute(%s)' % ATTRS[attr]) + print('#define %s __attribute__((%s))' % (attr, ATTRS[attr])) + print('#else') + print('#define %s' % attr) + print('#endif') + print('#endif') + if used_attrs: + print('') + def print_footer(self): footer = '' print(footer) @@ -827,7 +871,10 @@ class PrinterHelpers(Printer): print(' *{}{}'.format(' \t' if line else '', line)) print(' */') - print('static %s %s(* const %s)(' % (self.map_type(proto['ret_type']), + print('static ', end='') + if helper.attrs: + print('%s ' % (" ".join(helper.attrs)), end='') + print('%s %s(* const %s)(' % (self.map_type(proto['ret_type']), proto['ret_star'], proto['name']), end='') comma = '' for i, a in enumerate(proto['args']): -- 2.51.0 From 4f647a780f3606acbd2116248d51eadb4d865615 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:10 -0700 Subject: [PATCH 12/16] bpf: __bpf_fastcall for bpf_get_smp_processor_id in uapi Since [1] kernel supports __bpf_fastcall attribute for helper function bpf_get_smp_processor_id(). Update uapi definition for this helper in order to have this attribute in the generated bpf_helper_defs.h [1] commit 91b7fbf3936f ("bpf, x86, riscv, arm: no_caller_saved_registers for bpf_get_smp_processor_id()") Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ tools/include/uapi/linux/bpf.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c6cd7c7aeeee..8ab4d8184b9d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1970,6 +1970,8 @@ union bpf_attr { * program. * Return * The SMP id of the processor running the program. + * Attributes + * __bpf_fastcall * * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1fb3cb2636e6..7610883c8191 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1970,6 +1970,8 @@ union bpf_attr { * program. * Return * The SMP id of the processor running the program. + * Attributes + * __bpf_fastcall * * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description -- 2.51.0 From da7d71bcb0637b7aa18934628fdb5a55f2db49a6 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:11 -0700 Subject: [PATCH 13/16] bpf: Use KF_FASTCALL to mark kfuncs supporting fastcall contract In order to allow pahole add btf_decl_tag("bpf_fastcall") for kfuncs supporting bpf_fastcall, mark such functions with KF_FASTCALL in id_set8 objects. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 1 + kernel/bpf/helpers.c | 4 ++-- kernel/bpf/verifier.c | 5 +---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index b8a583194c4a..631060e3ad14 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -75,6 +75,7 @@ #define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ #define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ +#define KF_FASTCALL (1 << 12) /* kfunc supports bpf_fastcall protocol */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1a43d06eab28..4053f279ed4c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3052,8 +3052,8 @@ BTF_ID(func, bpf_cgroup_release_dtor) #endif BTF_KFUNCS_START(common_btf_ids) -BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) -BTF_ID_FLAGS(func, bpf_rdonly_cast) +BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx, KF_FASTCALL) +BTF_ID_FLAGS(func, bpf_rdonly_cast, KF_FASTCALL) BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9a7ed527e47e..7d9b38ffd220 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -16176,10 +16176,7 @@ static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta) /* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */ static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta) { - if (meta->btf == btf_vmlinux) - return meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || - meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]; - return false; + return meta->kfunc_flags & KF_FASTCALL; } /* LLVM define a bpf_fastcall function attribute. -- 2.51.0 From bf7ce5416f68db058ac7105902adf497b3ce4e8c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:12 -0700 Subject: [PATCH 14/16] bpftool: __bpf_fastcall for kfuncs marked with special decl_tag Generate __attribute__((bpf_fastcall)) for kfuncs marked with "bpf_fastcall" decl tag. E.g. for the following BTF: $ bpftool btf dump file vmlinux ... [A] FUNC 'bpf_rdonly_cast' type_id=... ... [B] DECL_TAG 'bpf_kfunc' type_id=A component_idx=-1 [C] DECL_TAG 'bpf_fastcall' type_id=A component_idx=-1 Generate the following vmlinux.h: #ifndef __VMLINUX_H__ #define __VMLINUX_H__ ... #ifndef __bpf_fastcall #if __has_attribute(bpf_fastcall) #define __bpf_fastcall __attribute__((bpf_fastcall)) #else #define __bpf_fastcall #endif #endif ... __bpf_fastcall extern void *bpf_rdonly_cast(...) ...; The "bpf_fastcall" / "bpf_kfunc" tags pair would generated by pahole when constructing vmlinux BTF. While at it, sort printed kfuncs by name for better vmlinux.h stability. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-5-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/btf.c | 98 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 10 deletions(-) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 7d2af1ff3c8d..547c1ccdce98 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -1,11 +1,15 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2019 Facebook */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include #include #include #include #include +#include #include #include #include @@ -21,6 +25,7 @@ #include "main.h" #define KFUNC_DECL_TAG "bpf_kfunc" +#define FASTCALL_DECL_TAG "bpf_fastcall" static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_UNKN] = "UNKNOWN", @@ -464,19 +469,59 @@ static int dump_btf_raw(const struct btf *btf, return 0; } +struct ptr_array { + __u32 cnt; + __u32 cap; + const void **elems; +}; + +static int ptr_array_push(const void *ptr, struct ptr_array *arr) +{ + __u32 new_cap; + void *tmp; + + if (arr->cnt == arr->cap) { + new_cap = (arr->cap ?: 16) * 2; + tmp = realloc(arr->elems, sizeof(*arr->elems) * new_cap); + if (!tmp) + return -ENOMEM; + arr->elems = tmp; + arr->cap = new_cap; + } + arr->elems[arr->cnt++] = ptr; + return 0; +} + +static void ptr_array_free(struct ptr_array *arr) +{ + free(arr->elems); +} + +static int cmp_kfuncs(const void *pa, const void *pb, void *ctx) +{ + struct btf *btf = ctx; + const struct btf_type *a = *(void **)pa; + const struct btf_type *b = *(void **)pb; + + return strcmp(btf__str_by_offset(btf, a->name_off), + btf__str_by_offset(btf, b->name_off)); +} + static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) { LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts); - int cnt = btf__type_cnt(btf); - int i; + __u32 cnt = btf__type_cnt(btf), i, j; + struct ptr_array fastcalls = {}; + struct ptr_array kfuncs = {}; + int err = 0; printf("\n/* BPF kfuncs */\n"); printf("#ifndef BPF_NO_KFUNC_PROTOTYPES\n"); for (i = 1; i < cnt; i++) { const struct btf_type *t = btf__type_by_id(btf, i); + const struct btf_type *ft; const char *name; - int err; if (!btf_is_decl_tag(t)) continue; @@ -484,27 +529,53 @@ static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) if (btf_decl_tag(t)->component_idx != -1) continue; - name = btf__name_by_offset(btf, t->name_off); - if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG))) + ft = btf__type_by_id(btf, t->type); + if (!btf_is_func(ft)) continue; - t = btf__type_by_id(btf, t->type); - if (!btf_is_func(t)) - continue; + name = btf__name_by_offset(btf, t->name_off); + if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG)) == 0) { + err = ptr_array_push(ft, &kfuncs); + if (err) + goto out; + } + + if (strncmp(name, FASTCALL_DECL_TAG, sizeof(FASTCALL_DECL_TAG)) == 0) { + err = ptr_array_push(ft, &fastcalls); + if (err) + goto out; + } + } + + /* Sort kfuncs by name for improved vmlinux.h stability */ + qsort_r(kfuncs.elems, kfuncs.cnt, sizeof(*kfuncs.elems), cmp_kfuncs, (void *)btf); + for (i = 0; i < kfuncs.cnt; i++) { + const struct btf_type *t = kfuncs.elems[i]; printf("extern "); + /* Assume small amount of fastcall kfuncs */ + for (j = 0; j < fastcalls.cnt; j++) { + if (fastcalls.elems[j] == t) { + printf("__bpf_fastcall "); + break; + } + } + opts.field_name = btf__name_by_offset(btf, t->name_off); err = btf_dump__emit_type_decl(d, t->type, &opts); if (err) - return err; + goto out; printf(" __weak __ksym;\n"); } printf("#endif\n\n"); - return 0; +out: + ptr_array_free(&fastcalls); + ptr_array_free(&kfuncs); + return err; } static void __printf(2, 0) btf_dump_printf(void *ctx, @@ -718,6 +789,13 @@ static int dump_btf_c(const struct btf *btf, printf("#ifndef __weak\n"); printf("#define __weak __attribute__((weak))\n"); printf("#endif\n\n"); + printf("#ifndef __bpf_fastcall\n"); + printf("#if __has_attribute(bpf_fastcall)\n"); + printf("#define __bpf_fastcall __attribute__((bpf_fastcall))\n"); + printf("#else\n"); + printf("#define __bpf_fastcall\n"); + printf("#endif\n"); + printf("#endif\n\n"); if (root_type_cnt) { for (i = 0; i < root_type_cnt; i++) { -- 2.51.0 From a5da3d65681f86f582420b5aea49c1d9a7c7e51e Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 1 Oct 2024 00:15:22 +0100 Subject: [PATCH 15/16] selftests/bpf: Emit top frequent code lines in veristat Production BPF programs are increasing in number of instructions and states to the point, where optimising verification process for them is necessary to avoid running into instruction limit. Authors of those BPF programs need to analyze verifier output, for example, collecting the most frequent source code lines to understand which part of the program has the biggest verification cost. This patch introduces `--top-src-lines` flag in veristat. `--top-src-lines=N` makes veristat output N the most popular sorce code lines, parsed from verification log. An example of output: ``` sudo ./veristat --top-src-lines=2 bpf_flow.bpf.o Processing 'bpf_flow.bpf.o'... Top source lines (_dissect): 4: (bpf_helpers.h:161) asm volatile("r1 = %[ctx]\n\t" 4: (bpf_flow.c:155) if (iph && iph->ihl == 5 && ... ``` Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240930231522.58650-1-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 129 ++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1ec5c4c47235..c8efd44590d9 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -179,6 +179,7 @@ static struct env { int files_skipped; int progs_processed; int progs_skipped; + int top_src_lines; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -228,6 +229,7 @@ static const struct argp_option opts[] = { "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, { "test-reg-invariants", 'r', NULL, 0, "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, + { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, {}, }; @@ -327,6 +329,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return err; } break; + case 'S': + errno = 0; + env.top_src_lines = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid top lines N specifier: %s\n", arg); + argp_usage(state); + } + break; case ARGP_KEY_ARG: tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); if (!tmp) @@ -854,6 +864,118 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats * return 0; } +struct line_cnt { + char *line; + int cnt; +}; + +static int str_cmp(const void *a, const void *b) +{ + const char **str1 = (const char **)a; + const char **str2 = (const char **)b; + + return strcmp(*str1, *str2); +} + +static int line_cnt_cmp(const void *a, const void *b) +{ + const struct line_cnt *a_cnt = (const struct line_cnt *)a; + const struct line_cnt *b_cnt = (const struct line_cnt *)b; + + if (a_cnt->cnt != b_cnt->cnt) + return a_cnt->cnt < b_cnt->cnt ? -1 : 1; + return strcmp(a_cnt->line, b_cnt->line); +} + +static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name) +{ + int lines_cap = 0; + int lines_size = 0; + char **lines = NULL; + char *line = NULL; + char *state; + struct line_cnt *freq = NULL; + struct line_cnt *cur; + int unique_lines; + int err = 0; + int i; + + while ((line = strtok_r(line ? NULL : buf, "\n", &state))) { + if (strncmp(line, "; ", 2) != 0) + continue; + line += 2; + + if (lines_size == lines_cap) { + char **tmp; + + lines_cap = max(16, lines_cap * 2); + tmp = realloc(lines, lines_cap * sizeof(*tmp)); + if (!tmp) { + err = -ENOMEM; + goto cleanup; + } + lines = tmp; + } + lines[lines_size] = line; + lines_size++; + } + + if (lines_size == 0) + goto cleanup; + + qsort(lines, lines_size, sizeof(*lines), str_cmp); + + freq = calloc(lines_size, sizeof(*freq)); + if (!freq) { + err = -ENOMEM; + goto cleanup; + } + + cur = freq; + cur->line = lines[0]; + cur->cnt = 1; + for (i = 1; i < lines_size; ++i) { + if (strcmp(lines[i], cur->line) != 0) { + cur++; + cur->line = lines[i]; + cur->cnt = 0; + } + cur->cnt++; + } + unique_lines = cur - freq + 1; + + qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp); + + printf("Top source lines (%s):\n", prog_name); + for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) { + const char *src_code = freq[i].line; + const char *src_line = NULL; + char *split = strrchr(freq[i].line, '@'); + + if (split) { + src_line = split + 1; + + while (*src_line && isspace(*src_line)) + src_line++; + + while (split > src_code && isspace(*split)) + split--; + *split = '\0'; + } + + if (src_line) + printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code); + else + printf("%5d: %s\n", freq[i].cnt, src_code); + } + printf("\n"); + +cleanup: + free(freq); + free(lines); + return err; +} + static int guess_prog_type_by_ctx_name(const char *ctx_name, enum bpf_prog_type *prog_type, enum bpf_attach_type *attach_type) @@ -1009,13 +1131,16 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats = &env.prog_stats[env.prog_stat_cnt++]; memset(stats, 0, sizeof(*stats)); - if (env.verbose) { + if (env.verbose || env.top_src_lines > 0) { buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024; buf = malloc(buf_sz); if (!buf) return -ENOMEM; /* ensure we always request stats */ log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0); + /* --top-src-lines needs verifier log */ + if (env.top_src_lines > 0 && env.log_level == 0) + log_level |= 2; } else { buf = verif_log_buf; buf_sz = sizeof(verif_log_buf); @@ -1048,6 +1173,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf filename, prog_name, stats->stats[DURATION], err ? "failure" : "success", buf); } + if (env.top_src_lines > 0) + print_top_src_lines(buf, buf_sz, stats->prog_name); if (verif_log_buf != buf) free(buf); -- 2.51.0 From 710fbca820c721cdd60fa8c5bbe9deb4c0788aae Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 27 Sep 2024 15:13:52 +0200 Subject: [PATCH 16/16] libbpf: Add missing per-arch include path MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit libbpf does not include the per-arch tools include path, e.g. tools/arch/riscv/include. Some architectures depend those files to build properly. Include tools/arch/$(SUBARCH)/include in the libbpf build. Fixes: 6d74d178fe6e ("tools: Add riscv barrier implementation") Signed-off-by: Björn Töpel Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240927131355.350918-1-bjorn@kernel.org --- tools/lib/bpf/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 1b22f0f37288..857a5f7b413d 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -61,7 +61,8 @@ ifndef VERBOSE endif INCLUDES = -I$(or $(OUTPUT),.) \ - -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi + -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \ + -I$(srctree)/tools/arch/$(SRCARCH)/include export prefix libdir src obj -- 2.51.0