From bcc60abd67c7741a065480df4875534f62f758b1 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:44 -0700 Subject: [PATCH 01/16] libbpf: Support opening bpf objects of either endianness Allow bpf_object__open() to access files of either endianness, and convert included BPF programs to native byte-order in-memory for introspection. Loading BPF objects of non-native byte-order is still disallowed however. Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/26353c1a1887a54400e1acd6c138fa90c99cdd40.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf.c | 51 +++++++++++++++++++++++++++------ tools/lib/bpf/libbpf_internal.h | 10 +++++++ 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a7cbb30605da..041228674bfa 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -694,6 +694,8 @@ struct bpf_object { /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ struct elf_state efile; + unsigned char byteorder; + struct btf *btf; struct btf_ext *btf_ext; @@ -940,6 +942,20 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return 0; } +static void bpf_object_bswap_progs(struct bpf_object *obj) +{ + struct bpf_program *prog = obj->programs; + struct bpf_insn *insn; + int p, i; + + for (p = 0; p < obj->nr_programs; p++, prog++) { + insn = prog->insns; + for (i = 0; i < prog->insns_cnt; i++, insn++) + bpf_insn_bswap(insn); + } + pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs); +} + static const struct btf_member * find_member_by_offset(const struct btf_type *t, __u32 bit_offset) { @@ -1506,6 +1522,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) elf_end(obj->efile.elf); obj->efile.elf = NULL; + obj->efile.ehdr = NULL; obj->efile.symbols = NULL; obj->efile.arena_data = NULL; @@ -1571,6 +1588,16 @@ static int bpf_object__elf_init(struct bpf_object *obj) goto errout; } + /* Validate ELF object endianness... */ + if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + err = -LIBBPF_ERRNO__ENDIAN; + pr_warn("elf: '%s' has unknown byte order\n", obj->path); + goto errout; + } + /* and save after bpf_object_open() frees ELF data */ + obj->byteorder = ehdr->e_ident[EI_DATA]; + if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); @@ -1599,19 +1626,15 @@ errout: return err; } -static int bpf_object__check_endianness(struct bpf_object *obj) +static bool is_native_endianness(struct bpf_object *obj) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - return 0; + return obj->byteorder == ELFDATA2LSB; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) - return 0; + return obj->byteorder == ELFDATA2MSB; #else # error "Unrecognized __BYTE_ORDER__" #endif - pr_warn("elf: endianness mismatch in %s.\n", obj->path); - return -LIBBPF_ERRNO__ENDIAN; } static int @@ -3954,6 +3977,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return -LIBBPF_ERRNO__FORMAT; } + /* change BPF program insns to native endianness for introspection */ + if (!is_native_endianness(obj)) + bpf_object_bswap_progs(obj); + /* sort BPF programs by section name and in-section instruction offset * for faster search */ @@ -7997,7 +8024,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, } err = bpf_object__elf_init(obj); - err = err ? : bpf_object__check_endianness(obj); err = err ? : bpf_object__elf_collect(obj); err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object_fixup_btf(obj); @@ -8503,8 +8529,15 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch return libbpf_err(-EINVAL); } - if (obj->gen_loader) + /* Disallow kernel loading programs of non-native endianness but + * permit cross-endian creation of "light skeleton". + */ + if (obj->gen_loader) { bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); + } else if (!is_native_endianness(obj)) { + pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name); + return libbpf_err(-LIBBPF_ERRNO__ENDIAN); + } err = bpf_object_prepare_token(obj); err = err ? : bpf_object__probe_loading(obj); diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 182529360c84..de498e2dd6b0 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -617,6 +617,16 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +static inline void bpf_insn_bswap(struct bpf_insn *insn) +{ + __u8 tmp_reg = insn->dst_reg; + + insn->dst_reg = insn->src_reg; + insn->src_reg = tmp_reg; + insn->off = bswap_16(insn->off); + insn->imm = bswap_32(insn->imm); +} + /* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. * Original FD is not closed or altered in any other way. * Preserves original FD value, if it's invalid (negative). -- 2.51.0 From 0aed726cf6f97bfe11de693781504787857894b7 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:45 -0700 Subject: [PATCH 02/16] libbpf: Support linking bpf objects of either endianness Allow static linking object files of either endianness, checking that input files have consistent byte-order, and setting output endianness from input. Linking requires in-memory processing of programs, relocations, sections, etc. in native endianness, and output conversion to target byte-order. This is enabled by built-in ELF translation and recent BTF/BTF.ext endianness functions. Further add local functions for swapping byte-order of sections containing BPF insns. Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/b47ca686d02664843fc99b96262fe3259650bc43.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/linker.c | 78 +++++++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 16 deletions(-) diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 6985ab0f1ca9..81dbbdd79a7c 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -135,6 +135,7 @@ struct bpf_linker { int fd; Elf *elf; Elf64_Ehdr *elf_hdr; + bool swapped_endian; /* Output sections metadata */ struct dst_sec *secs; @@ -324,13 +325,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->elf_hdr->e_machine = EM_BPF; linker->elf_hdr->e_type = ET_REL; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif + /* Set unknown ELF endianness, assign later from input files */ + linker->elf_hdr->e_ident[EI_DATA] = ELFDATANONE; /* STRTAB */ /* initialize strset with an empty string to conform to ELF */ @@ -541,19 +537,21 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts, struct src_obj *obj) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - const int host_endianness = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - const int host_endianness = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif int err = 0; Elf_Scn *scn; Elf_Data *data; Elf64_Ehdr *ehdr; Elf64_Shdr *shdr; struct src_sec *sec; + unsigned char obj_byteorder; + unsigned char link_byteorder = linker->elf_hdr->e_ident[EI_DATA]; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2MSB; +#else +#error "Unknown __BYTE_ORDER__" +#endif pr_debug("linker: adding object file '%s'...\n", filename); @@ -579,11 +577,25 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, pr_warn_elf("failed to get ELF header for %s", filename); return err; } - if (ehdr->e_ident[EI_DATA] != host_endianness) { + + /* Linker output endianness set by first input object */ + obj_byteorder = ehdr->e_ident[EI_DATA]; + if (obj_byteorder != ELFDATA2LSB && obj_byteorder != ELFDATA2MSB) { + err = -EOPNOTSUPP; + pr_warn("unknown byte order of ELF file %s\n", filename); + return err; + } + if (link_byteorder == ELFDATANONE) { + linker->elf_hdr->e_ident[EI_DATA] = obj_byteorder; + linker->swapped_endian = obj_byteorder != host_byteorder; + pr_debug("linker: set %s-endian output byte order\n", + obj_byteorder == ELFDATA2MSB ? "big" : "little"); + } else if (link_byteorder != obj_byteorder) { err = -EOPNOTSUPP; - pr_warn_elf("unsupported byte order of ELF file %s", filename); + pr_warn("byte order mismatch with ELF file %s\n", filename); return err; } + if (ehdr->e_type != ET_REL || ehdr->e_machine != EM_BPF || ehdr->e_ident[EI_CLASS] != ELFCLASS64) { @@ -1111,6 +1123,24 @@ static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec return true; } +static bool is_exec_sec(struct dst_sec *sec) +{ + if (!sec || sec->ephemeral) + return false; + return (sec->shdr->sh_type == SHT_PROGBITS) && + (sec->shdr->sh_flags & SHF_EXECINSTR); +} + +static void exec_sec_bswap(void *raw_data, int size) +{ + const int insn_cnt = size / sizeof(struct bpf_insn); + struct bpf_insn *insn = raw_data; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); +} + static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src) { void *tmp; @@ -1170,6 +1200,10 @@ static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz); /* now copy src data at a properly aligned offset */ memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size); + + /* convert added bpf insns to native byte-order */ + if (linker->swapped_endian && is_exec_sec(dst)) + exec_sec_bswap(dst->raw_data + dst_align_sz, src->shdr->sh_size); } dst->sec_sz = dst_final_sz; @@ -2630,6 +2664,10 @@ int bpf_linker__finalize(struct bpf_linker *linker) if (!sec->scn) continue; + /* restore sections with bpf insns to target byte-order */ + if (linker->swapped_endian && is_exec_sec(sec)) + exec_sec_bswap(sec->raw_data, sec->sec_sz); + sec->data->d_buf = sec->raw_data; } @@ -2698,6 +2736,7 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name, static int finalize_btf(struct bpf_linker *linker) { + enum btf_endianness link_endianness; LIBBPF_OPTS(btf_dedup_opts, opts); struct btf *btf = linker->btf; const void *raw_data; @@ -2742,6 +2781,13 @@ static int finalize_btf(struct bpf_linker *linker) return err; } + /* Set .BTF and .BTF.ext output byte order */ + link_endianness = linker->elf_hdr->e_ident[EI_DATA] == ELFDATA2MSB ? + BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; + btf__set_endianness(linker->btf, link_endianness); + if (linker->btf_ext) + btf_ext__set_endianness(linker->btf_ext, link_endianness); + /* Emit .BTF section */ raw_data = btf__raw_data(linker->btf, &raw_sz); if (!raw_data) -- 2.51.0 From 8ca3323dce43792b6a7bbbc3314fc2a281d3af92 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:46 -0700 Subject: [PATCH 03/16] libbpf: Support creating light skeleton of either endianness Track target endianness in 'struct bpf_gen' and process in-memory data in native byte-order, but on finalization convert the embedded loader BPF insns to target endianness. The light skeleton also includes a target-accessed data blob which is heterogeneous and thus difficult to convert to target byte-order on finalization. Add support functions to convert data to target endianness as it is added to the blob. Also add additional debug logging for data blob structure details and skeleton loading. Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/569562e1d5bf1cce80a1f1a3882461ee2da1ffd5.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/bpf_gen_internal.h | 1 + tools/lib/bpf/gen_loader.c | 187 ++++++++++++++++++++++--------- tools/lib/bpf/libbpf.c | 1 + tools/lib/bpf/skel_internal.h | 3 +- 4 files changed, 139 insertions(+), 53 deletions(-) diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index fdf44403ff36..6ff963a491d9 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -34,6 +34,7 @@ struct bpf_gen { void *data_cur; void *insn_start; void *insn_cur; + bool swapped_endian; ssize_t cleanup_label; __u32 nr_progs; __u32 nr_maps; diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index cf3323fd47b8..8a2b0f62d91d 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -401,6 +401,15 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) opts->insns_sz = gen->insn_cur - gen->insn_start; opts->data = gen->data_start; opts->data_sz = gen->data_cur - gen->data_start; + + /* use target endianness for embedded loader */ + if (gen->swapped_endian) { + struct bpf_insn *insn = (struct bpf_insn *)opts->insns; + int insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); + + for (i = 0; i < insn_cnt; i++) + bpf_insn_bswap(insn++); + } } return gen->error; } @@ -414,6 +423,28 @@ void bpf_gen__free(struct bpf_gen *gen) free(gen); } +/* + * Fields of bpf_attr are set to values in native byte-order before being + * written to the target-bound data blob, and may need endian conversion. + * This macro allows providing the correct value in situ more simply than + * writing a separate converter for *all fields* of *all records* included + * in union bpf_attr. Note that sizeof(rval) should match the assignment + * target to avoid runtime problems. + */ +#define tgt_endian(rval) ({ \ + typeof(rval) _val = (rval); \ + if (gen->swapped_endian) { \ + switch (sizeof(_val)) { \ + case 1: break; \ + case 2: _val = bswap_16(_val); break; \ + case 4: _val = bswap_32(_val); break; \ + case 8: _val = bswap_64(_val); break; \ + default: pr_warn("unsupported bswap size!\n"); \ + } \ + } \ + _val; \ +}) + void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, __u32 btf_raw_size) { @@ -422,11 +453,12 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: load_btf: size %d\n", btf_raw_size); btf_data = add_data(gen, btf_raw_data, btf_raw_size); - attr.btf_size = btf_raw_size; + attr.btf_size = tgt_endian(btf_raw_size); btf_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: load_btf: off %d size %d, attr: off %d size %d\n", + btf_data, btf_raw_size, btf_load_attr, attr_size); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4, @@ -457,28 +489,29 @@ void bpf_gen__map_create(struct bpf_gen *gen, union bpf_attr attr; memset(&attr, 0, attr_size); - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.map_flags = map_attr->map_flags; - attr.map_extra = map_attr->map_extra; + attr.map_type = tgt_endian(map_type); + attr.key_size = tgt_endian(key_size); + attr.value_size = tgt_endian(value_size); + attr.map_flags = tgt_endian(map_attr->map_flags); + attr.map_extra = tgt_endian(map_attr->map_extra); if (map_name) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); - attr.numa_node = map_attr->numa_node; - attr.map_ifindex = map_attr->map_ifindex; - attr.max_entries = max_entries; - attr.btf_key_type_id = map_attr->btf_key_type_id; - attr.btf_value_type_id = map_attr->btf_value_type_id; - - pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", - attr.map_name, map_idx, map_type, attr.btf_value_type_id); + attr.numa_node = tgt_endian(map_attr->numa_node); + attr.map_ifindex = tgt_endian(map_attr->map_ifindex); + attr.max_entries = tgt_endian(max_entries); + attr.btf_key_type_id = tgt_endian(map_attr->btf_key_type_id); + attr.btf_value_type_id = tgt_endian(map_attr->btf_value_type_id); map_create_attr = add_data(gen, &attr, attr_size); - if (attr.btf_value_type_id) + pr_debug("gen: map_create: %s idx %d type %d value_type_id %d, attr: off %d size %d\n", + map_name, map_idx, map_type, map_attr->btf_value_type_id, + map_create_attr, attr_size); + + if (map_attr->btf_value_type_id) /* populate union bpf_attr with btf_fd saved in the stack earlier */ move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4, stack_off(btf_fd)); - switch (attr.map_type) { + switch (map_type) { case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4, @@ -498,8 +531,8 @@ void bpf_gen__map_create(struct bpf_gen *gen, /* emit MAP_CREATE command */ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", - attr.map_name, map_idx, map_type, value_size, - attr.btf_value_type_id); + map_name, map_idx, map_type, value_size, + map_attr->btf_value_type_id); emit_check_err(gen); /* remember map_fd in the stack, if successful */ if (map_idx < 0) { @@ -784,12 +817,12 @@ log: emit_ksym_relo_log(gen, relo, kdesc->ref); } -static __u32 src_reg_mask(void) +static __u32 src_reg_mask(struct bpf_gen *gen) { -#if defined(__LITTLE_ENDIAN_BITFIELD) - return 0x0f; /* src_reg,dst_reg,... */ -#elif defined(__BIG_ENDIAN_BITFIELD) - return 0xf0; /* dst_reg,src_reg,... */ +#if defined(__LITTLE_ENDIAN_BITFIELD) /* src_reg,dst_reg,... */ + return gen->swapped_endian ? 0xf0 : 0x0f; +#elif defined(__BIG_ENDIAN_BITFIELD) /* dst_reg,src_reg,... */ + return gen->swapped_endian ? 0x0f : 0xf0; #else #error "Unsupported bit endianness, cannot proceed" #endif @@ -840,7 +873,7 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 3)); clear_src_reg: /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ - reg_mask = src_reg_mask(); + reg_mask = src_reg_mask(gen); emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); @@ -931,48 +964,94 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) cleanup_core_relo(gen); } +/* Convert func, line, and core relo info blobs to target endianness */ +static void info_blob_bswap(struct bpf_gen *gen, int func_info, int line_info, + int core_relos, struct bpf_prog_load_opts *load_attr) +{ + struct bpf_func_info *fi = gen->data_start + func_info; + struct bpf_line_info *li = gen->data_start + line_info; + struct bpf_core_relo *cr = gen->data_start + core_relos; + int i; + + for (i = 0; i < load_attr->func_info_cnt; i++) + bpf_func_info_bswap(fi++); + + for (i = 0; i < load_attr->line_info_cnt; i++) + bpf_line_info_bswap(li++); + + for (i = 0; i < gen->core_relo_cnt; i++) + bpf_core_relo_bswap(cr++); +} + void bpf_gen__prog_load(struct bpf_gen *gen, enum bpf_prog_type prog_type, const char *prog_name, const char *license, struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *load_attr, int prog_idx) { + int func_info_tot_sz = load_attr->func_info_cnt * + load_attr->func_info_rec_size; + int line_info_tot_sz = load_attr->line_info_cnt * + load_attr->line_info_rec_size; + int core_relo_tot_sz = gen->core_relo_cnt * + sizeof(struct bpf_core_relo); int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", - prog_type, insn_cnt, prog_idx); /* add license string to blob of bytes */ license_off = add_data(gen, license, strlen(license) + 1); /* add insns to blob of bytes */ insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); + pr_debug("gen: prog_load: prog_idx %d type %d insn off %d insns_cnt %zd license off %d\n", + prog_idx, prog_type, insns_off, insn_cnt, license_off); - attr.prog_type = prog_type; - attr.expected_attach_type = load_attr->expected_attach_type; - attr.attach_btf_id = load_attr->attach_btf_id; - attr.prog_ifindex = load_attr->prog_ifindex; - attr.kern_version = 0; - attr.insn_cnt = (__u32)insn_cnt; - attr.prog_flags = load_attr->prog_flags; - - attr.func_info_rec_size = load_attr->func_info_rec_size; - attr.func_info_cnt = load_attr->func_info_cnt; - func_info = add_data(gen, load_attr->func_info, - attr.func_info_cnt * attr.func_info_rec_size); + /* convert blob insns to target endianness */ + if (gen->swapped_endian) { + struct bpf_insn *insn = gen->data_start + insns_off; + int i; - attr.line_info_rec_size = load_attr->line_info_rec_size; - attr.line_info_cnt = load_attr->line_info_cnt; - line_info = add_data(gen, load_attr->line_info, - attr.line_info_cnt * attr.line_info_rec_size); + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); + } - attr.core_relo_rec_size = sizeof(struct bpf_core_relo); - attr.core_relo_cnt = gen->core_relo_cnt; - core_relos = add_data(gen, gen->core_relos, - attr.core_relo_cnt * attr.core_relo_rec_size); + attr.prog_type = tgt_endian(prog_type); + attr.expected_attach_type = tgt_endian(load_attr->expected_attach_type); + attr.attach_btf_id = tgt_endian(load_attr->attach_btf_id); + attr.prog_ifindex = tgt_endian(load_attr->prog_ifindex); + attr.kern_version = 0; + attr.insn_cnt = tgt_endian((__u32)insn_cnt); + attr.prog_flags = tgt_endian(load_attr->prog_flags); + + attr.func_info_rec_size = tgt_endian(load_attr->func_info_rec_size); + attr.func_info_cnt = tgt_endian(load_attr->func_info_cnt); + func_info = add_data(gen, load_attr->func_info, func_info_tot_sz); + pr_debug("gen: prog_load: func_info: off %d cnt %d rec size %d\n", + func_info, load_attr->func_info_cnt, + load_attr->func_info_rec_size); + + attr.line_info_rec_size = tgt_endian(load_attr->line_info_rec_size); + attr.line_info_cnt = tgt_endian(load_attr->line_info_cnt); + line_info = add_data(gen, load_attr->line_info, line_info_tot_sz); + pr_debug("gen: prog_load: line_info: off %d cnt %d rec size %d\n", + line_info, load_attr->line_info_cnt, + load_attr->line_info_rec_size); + + attr.core_relo_rec_size = tgt_endian((__u32)sizeof(struct bpf_core_relo)); + attr.core_relo_cnt = tgt_endian(gen->core_relo_cnt); + core_relos = add_data(gen, gen->core_relos, core_relo_tot_sz); + pr_debug("gen: prog_load: core_relos: off %d cnt %d rec size %zd\n", + core_relos, gen->core_relo_cnt, + sizeof(struct bpf_core_relo)); + + /* convert all info blobs to target endianness */ + if (gen->swapped_endian) + info_blob_bswap(gen, func_info, line_info, core_relos, load_attr); libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); prog_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: prog_load: attr: off %d size %d\n", + prog_load_attr, attr_size); /* populate union bpf_attr with a pointer to license */ emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); @@ -1040,7 +1119,6 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, int zero = 0; memset(&attr, 0, attr_size); - pr_debug("gen: map_update_elem: idx %d\n", map_idx); value = add_data(gen, pvalue, value_size); key = add_data(gen, &zero, sizeof(zero)); @@ -1068,6 +1146,8 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_update_elem: idx %d, value: off %d size %d, attr: off %d size %d\n", + map_idx, value, value_size, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1084,14 +1164,16 @@ void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slo int attr_size = offsetofend(union bpf_attr, flags); int map_update_attr, key; union bpf_attr attr; + int tgt_slot; memset(&attr, 0, attr_size); - pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", - outer_map_idx, slot, inner_map_idx); - key = add_data(gen, &slot, sizeof(slot)); + tgt_slot = tgt_endian(slot); + key = add_data(gen, &tgt_slot, sizeof(tgt_slot)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: populate_outer_map: outer %d key %d inner %d, attr: off %d size %d\n", + outer_map_idx, slot, inner_map_idx, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, outer_map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1112,8 +1194,9 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_freeze: idx %d, attr: off %d size %d\n", + map_idx, map_freeze_attr, attr_size); move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); /* emit MAP_FREEZE command */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 041228674bfa..712b95e8891b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -9134,6 +9134,7 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) if (!gen) return -ENOMEM; gen->opts = opts; + gen->swapped_endian = !is_native_endianness(obj); obj->gen_loader = gen; return 0; } diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 0875452521e9..4d5fa079b5d6 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -351,10 +351,11 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) attr.test.ctx_size_in = opts->ctx->sz; err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz); if (err < 0 || (int)attr.test.retval < 0) { - opts->errstr = "failed to execute loader prog"; if (err < 0) { + opts->errstr = "failed to execute loader prog"; set_err; } else { + opts->errstr = "error returned by loader prog"; err = (int)attr.test.retval; #ifndef __KERNEL__ errno = -err; -- 2.51.0 From 5a63c33d6f00e1739944cea2f445819951610c7d Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 16 Sep 2024 01:37:47 -0700 Subject: [PATCH 04/16] selftests/bpf: Support cross-endian building Update Makefile build rules to compile BPF programs with target endianness rather than host byte-order. With recent changes, this allows building the full selftests/bpf suite hosted on x86_64 and targeting s390x or mips64eb for example. Signed-off-by: Tony Ambardar Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/880ccc6342cfc4d3c48b44f581e87adfbce2876e.1726475448.git.tony.ambardar@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 365740f24d2e..e295e3df5ec6 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -445,6 +445,7 @@ endef IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - Date: Thu, 26 Sep 2024 15:49:48 +0100 Subject: [PATCH 05/16] selftests/bpf: Fix uprobe_multi compilation error MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit When building selftests, the following was seen: uprobe_multi.c: In function ‘trigger_uprobe’: uprobe_multi.c:108:40: error: ‘MADV_PAGEOUT’ undeclared (first use in this function) 108 | madvise(addr, page_sz, MADV_PAGEOUT); | ^~~~~~~~~~~~ uprobe_multi.c:108:40: note: each undeclared identifier is reported only once for each function it appears in make: *** [Makefile:850: bpf-next/tools/testing/selftests/bpf/uprobe_multi] Error 1 ...even with updated UAPI headers. It seems the above value is defined in UAPI but including that file triggers other redefinition errors. Simplest solution is to add a guarded definition, as was done for MADV_POPULATE_READ. Fixes: 3c217a182018 ("selftests/bpf: add build ID tests") Signed-off-by: Alan Maguire Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240926144948.172090-1-alan.maguire@oracle.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/uprobe_multi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c index c7828b13e5ff..dd38dc68f635 100644 --- a/tools/testing/selftests/bpf/uprobe_multi.c +++ b/tools/testing/selftests/bpf/uprobe_multi.c @@ -12,6 +12,10 @@ #define MADV_POPULATE_READ 22 #endif +#ifndef MADV_PAGEOUT +#define MADV_PAGEOUT 21 +#endif + int __attribute__((weak)) uprobe(void) { return 0; -- 2.51.0 From 48b13cab1e7cb77def27cb89711fb5e3b04db972 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:09 -0700 Subject: [PATCH 06/16] bpf: Allow specifying bpf_fastcall attribute for BPF helpers Allow a new optional 'Attributes' section to be specified for helper functions description, e.g.: * u32 bpf_get_smp_processor_id(void) * ... * Return * ... * Attributes * __bpf_fastcall * Generated header for the example above: #ifndef __bpf_fastcall #if __has_attribute(__bpf_fastcall) #define __bpf_fastcall __attribute__((bpf_fastcall)) #else #define __bpf_fastcall #endif #endif ... __bpf_fastcall static __u32 (* const bpf_get_smp_processor_id)(void) = (void *) 8; The following rules apply: - when present, section must follow 'Return' section; - attribute names are specified on the line following 'Attribute' keyword; - attribute names are separated by spaces; - section ends with an "empty" line (" *\n"). Valid attribute names are recorded in the ATTRS map. ATTRS maps shortcut attribute name to correct C syntax. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- scripts/bpf_doc.py | 53 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index c55878bddfdd..e74a01a85070 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -37,10 +37,11 @@ class APIElement(object): @desc: textual description of the symbol @ret: (optional) description of any associated return value """ - def __init__(self, proto='', desc='', ret=''): + def __init__(self, proto='', desc='', ret='', attrs=[]): self.proto = proto self.desc = desc self.ret = ret + self.attrs = attrs class Helper(APIElement): @@ -81,6 +82,11 @@ class Helper(APIElement): return res +ATTRS = { + '__bpf_fastcall': 'bpf_fastcall' +} + + class HeaderParser(object): """ An object used to parse a file in order to extract the documentation of a @@ -111,7 +117,8 @@ class HeaderParser(object): proto = self.parse_proto() desc = self.parse_desc(proto) ret = self.parse_ret(proto) - return Helper(proto=proto, desc=desc, ret=ret) + attrs = self.parse_attrs(proto) + return Helper(proto=proto, desc=desc, ret=ret, attrs=attrs) def parse_symbol(self): p = re.compile(r' \* ?(BPF\w+)$') @@ -192,6 +199,28 @@ class HeaderParser(object): raise Exception("No return found for " + proto) return ret + def parse_attrs(self, proto): + p = re.compile(r' \* ?(?:\t| {5,8})Attributes$') + capture = p.match(self.line) + if not capture: + return [] + # Expect a single line with mnemonics for attributes separated by spaces + self.line = self.reader.readline() + p = re.compile(r' \* ?(?:\t| {5,8})(?:\t| {8})(.*)') + capture = p.match(self.line) + if not capture: + raise Exception("Incomplete 'Attributes' section for " + proto) + attrs = capture.group(1).split(' ') + for attr in attrs: + if attr not in ATTRS: + raise Exception("Unexpected attribute '" + attr + "' specified for " + proto) + self.line = self.reader.readline() + if self.line != ' *\n': + raise Exception("Expecting empty line after 'Attributes' section for " + proto) + # Prepare a line for next self.parse_* to consume + self.line = self.reader.readline() + return attrs + def seek_to(self, target, help_message, discard_lines = 1): self.reader.seek(0) offset = self.reader.read().find(target) @@ -789,6 +818,21 @@ class PrinterHelpers(Printer): print('%s;' % fwd) print('') + used_attrs = set() + for helper in self.elements: + for attr in helper.attrs: + used_attrs.add(attr) + for attr in sorted(used_attrs): + print('#ifndef %s' % attr) + print('#if __has_attribute(%s)' % ATTRS[attr]) + print('#define %s __attribute__((%s))' % (attr, ATTRS[attr])) + print('#else') + print('#define %s' % attr) + print('#endif') + print('#endif') + if used_attrs: + print('') + def print_footer(self): footer = '' print(footer) @@ -827,7 +871,10 @@ class PrinterHelpers(Printer): print(' *{}{}'.format(' \t' if line else '', line)) print(' */') - print('static %s %s(* const %s)(' % (self.map_type(proto['ret_type']), + print('static ', end='') + if helper.attrs: + print('%s ' % (" ".join(helper.attrs)), end='') + print('%s %s(* const %s)(' % (self.map_type(proto['ret_type']), proto['ret_star'], proto['name']), end='') comma = '' for i, a in enumerate(proto['args']): -- 2.51.0 From 4f647a780f3606acbd2116248d51eadb4d865615 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:10 -0700 Subject: [PATCH 07/16] bpf: __bpf_fastcall for bpf_get_smp_processor_id in uapi Since [1] kernel supports __bpf_fastcall attribute for helper function bpf_get_smp_processor_id(). Update uapi definition for this helper in order to have this attribute in the generated bpf_helper_defs.h [1] commit 91b7fbf3936f ("bpf, x86, riscv, arm: no_caller_saved_registers for bpf_get_smp_processor_id()") Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ tools/include/uapi/linux/bpf.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c6cd7c7aeeee..8ab4d8184b9d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1970,6 +1970,8 @@ union bpf_attr { * program. * Return * The SMP id of the processor running the program. + * Attributes + * __bpf_fastcall * * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1fb3cb2636e6..7610883c8191 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1970,6 +1970,8 @@ union bpf_attr { * program. * Return * The SMP id of the processor running the program. + * Attributes + * __bpf_fastcall * * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description -- 2.51.0 From da7d71bcb0637b7aa18934628fdb5a55f2db49a6 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:11 -0700 Subject: [PATCH 08/16] bpf: Use KF_FASTCALL to mark kfuncs supporting fastcall contract In order to allow pahole add btf_decl_tag("bpf_fastcall") for kfuncs supporting bpf_fastcall, mark such functions with KF_FASTCALL in id_set8 objects. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 1 + kernel/bpf/helpers.c | 4 ++-- kernel/bpf/verifier.c | 5 +---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/include/linux/btf.h b/include/linux/btf.h index b8a583194c4a..631060e3ad14 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -75,6 +75,7 @@ #define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ #define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */ +#define KF_FASTCALL (1 << 12) /* kfunc supports bpf_fastcall protocol */ /* * Tag marking a kernel function as a kfunc. This is meant to minimize the diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1a43d06eab28..4053f279ed4c 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3052,8 +3052,8 @@ BTF_ID(func, bpf_cgroup_release_dtor) #endif BTF_KFUNCS_START(common_btf_ids) -BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx) -BTF_ID_FLAGS(func, bpf_rdonly_cast) +BTF_ID_FLAGS(func, bpf_cast_to_kern_ctx, KF_FASTCALL) +BTF_ID_FLAGS(func, bpf_rdonly_cast, KF_FASTCALL) BTF_ID_FLAGS(func, bpf_rcu_read_lock) BTF_ID_FLAGS(func, bpf_rcu_read_unlock) BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9a7ed527e47e..7d9b38ffd220 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -16176,10 +16176,7 @@ static u32 kfunc_fastcall_clobber_mask(struct bpf_kfunc_call_arg_meta *meta) /* Same as verifier_inlines_helper_call() but for kfuncs, see comment above */ static bool is_fastcall_kfunc_call(struct bpf_kfunc_call_arg_meta *meta) { - if (meta->btf == btf_vmlinux) - return meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] || - meta->func_id == special_kfunc_list[KF_bpf_rdonly_cast]; - return false; + return meta->kfunc_flags & KF_FASTCALL; } /* LLVM define a bpf_fastcall function attribute. -- 2.51.0 From bf7ce5416f68db058ac7105902adf497b3ce4e8c Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Mon, 16 Sep 2024 02:17:12 -0700 Subject: [PATCH 09/16] bpftool: __bpf_fastcall for kfuncs marked with special decl_tag Generate __attribute__((bpf_fastcall)) for kfuncs marked with "bpf_fastcall" decl tag. E.g. for the following BTF: $ bpftool btf dump file vmlinux ... [A] FUNC 'bpf_rdonly_cast' type_id=... ... [B] DECL_TAG 'bpf_kfunc' type_id=A component_idx=-1 [C] DECL_TAG 'bpf_fastcall' type_id=A component_idx=-1 Generate the following vmlinux.h: #ifndef __VMLINUX_H__ #define __VMLINUX_H__ ... #ifndef __bpf_fastcall #if __has_attribute(bpf_fastcall) #define __bpf_fastcall __attribute__((bpf_fastcall)) #else #define __bpf_fastcall #endif #endif ... __bpf_fastcall extern void *bpf_rdonly_cast(...) ...; The "bpf_fastcall" / "bpf_kfunc" tags pair would generated by pahole when constructing vmlinux BTF. While at it, sort printed kfuncs by name for better vmlinux.h stability. Signed-off-by: Eduard Zingerman Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240916091712.2929279-5-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/btf.c | 98 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 88 insertions(+), 10 deletions(-) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 7d2af1ff3c8d..547c1ccdce98 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -1,11 +1,15 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2019 Facebook */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include #include #include #include #include +#include #include #include #include @@ -21,6 +25,7 @@ #include "main.h" #define KFUNC_DECL_TAG "bpf_kfunc" +#define FASTCALL_DECL_TAG "bpf_fastcall" static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_UNKN] = "UNKNOWN", @@ -464,19 +469,59 @@ static int dump_btf_raw(const struct btf *btf, return 0; } +struct ptr_array { + __u32 cnt; + __u32 cap; + const void **elems; +}; + +static int ptr_array_push(const void *ptr, struct ptr_array *arr) +{ + __u32 new_cap; + void *tmp; + + if (arr->cnt == arr->cap) { + new_cap = (arr->cap ?: 16) * 2; + tmp = realloc(arr->elems, sizeof(*arr->elems) * new_cap); + if (!tmp) + return -ENOMEM; + arr->elems = tmp; + arr->cap = new_cap; + } + arr->elems[arr->cnt++] = ptr; + return 0; +} + +static void ptr_array_free(struct ptr_array *arr) +{ + free(arr->elems); +} + +static int cmp_kfuncs(const void *pa, const void *pb, void *ctx) +{ + struct btf *btf = ctx; + const struct btf_type *a = *(void **)pa; + const struct btf_type *b = *(void **)pb; + + return strcmp(btf__str_by_offset(btf, a->name_off), + btf__str_by_offset(btf, b->name_off)); +} + static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) { LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts); - int cnt = btf__type_cnt(btf); - int i; + __u32 cnt = btf__type_cnt(btf), i, j; + struct ptr_array fastcalls = {}; + struct ptr_array kfuncs = {}; + int err = 0; printf("\n/* BPF kfuncs */\n"); printf("#ifndef BPF_NO_KFUNC_PROTOTYPES\n"); for (i = 1; i < cnt; i++) { const struct btf_type *t = btf__type_by_id(btf, i); + const struct btf_type *ft; const char *name; - int err; if (!btf_is_decl_tag(t)) continue; @@ -484,27 +529,53 @@ static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) if (btf_decl_tag(t)->component_idx != -1) continue; - name = btf__name_by_offset(btf, t->name_off); - if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG))) + ft = btf__type_by_id(btf, t->type); + if (!btf_is_func(ft)) continue; - t = btf__type_by_id(btf, t->type); - if (!btf_is_func(t)) - continue; + name = btf__name_by_offset(btf, t->name_off); + if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG)) == 0) { + err = ptr_array_push(ft, &kfuncs); + if (err) + goto out; + } + + if (strncmp(name, FASTCALL_DECL_TAG, sizeof(FASTCALL_DECL_TAG)) == 0) { + err = ptr_array_push(ft, &fastcalls); + if (err) + goto out; + } + } + + /* Sort kfuncs by name for improved vmlinux.h stability */ + qsort_r(kfuncs.elems, kfuncs.cnt, sizeof(*kfuncs.elems), cmp_kfuncs, (void *)btf); + for (i = 0; i < kfuncs.cnt; i++) { + const struct btf_type *t = kfuncs.elems[i]; printf("extern "); + /* Assume small amount of fastcall kfuncs */ + for (j = 0; j < fastcalls.cnt; j++) { + if (fastcalls.elems[j] == t) { + printf("__bpf_fastcall "); + break; + } + } + opts.field_name = btf__name_by_offset(btf, t->name_off); err = btf_dump__emit_type_decl(d, t->type, &opts); if (err) - return err; + goto out; printf(" __weak __ksym;\n"); } printf("#endif\n\n"); - return 0; +out: + ptr_array_free(&fastcalls); + ptr_array_free(&kfuncs); + return err; } static void __printf(2, 0) btf_dump_printf(void *ctx, @@ -718,6 +789,13 @@ static int dump_btf_c(const struct btf *btf, printf("#ifndef __weak\n"); printf("#define __weak __attribute__((weak))\n"); printf("#endif\n\n"); + printf("#ifndef __bpf_fastcall\n"); + printf("#if __has_attribute(bpf_fastcall)\n"); + printf("#define __bpf_fastcall __attribute__((bpf_fastcall))\n"); + printf("#else\n"); + printf("#define __bpf_fastcall\n"); + printf("#endif\n"); + printf("#endif\n\n"); if (root_type_cnt) { for (i = 0; i < root_type_cnt; i++) { -- 2.51.0 From a5da3d65681f86f582420b5aea49c1d9a7c7e51e Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 1 Oct 2024 00:15:22 +0100 Subject: [PATCH 10/16] selftests/bpf: Emit top frequent code lines in veristat Production BPF programs are increasing in number of instructions and states to the point, where optimising verification process for them is necessary to avoid running into instruction limit. Authors of those BPF programs need to analyze verifier output, for example, collecting the most frequent source code lines to understand which part of the program has the biggest verification cost. This patch introduces `--top-src-lines` flag in veristat. `--top-src-lines=N` makes veristat output N the most popular sorce code lines, parsed from verification log. An example of output: ``` sudo ./veristat --top-src-lines=2 bpf_flow.bpf.o Processing 'bpf_flow.bpf.o'... Top source lines (_dissect): 4: (bpf_helpers.h:161) asm volatile("r1 = %[ctx]\n\t" 4: (bpf_flow.c:155) if (iph && iph->ihl == 5 && ... ``` Signed-off-by: Mykyta Yatsenko Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240930231522.58650-1-mykyta.yatsenko5@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/veristat.c | 129 ++++++++++++++++++++++++- 1 file changed, 128 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1ec5c4c47235..c8efd44590d9 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -179,6 +179,7 @@ static struct env { int files_skipped; int progs_processed; int progs_skipped; + int top_src_lines; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -228,6 +229,7 @@ static const struct argp_option opts[] = { "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, { "test-reg-invariants", 'r', NULL, 0, "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, + { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, {}, }; @@ -327,6 +329,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return err; } break; + case 'S': + errno = 0; + env.top_src_lines = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid top lines N specifier: %s\n", arg); + argp_usage(state); + } + break; case ARGP_KEY_ARG: tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); if (!tmp) @@ -854,6 +864,118 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats * return 0; } +struct line_cnt { + char *line; + int cnt; +}; + +static int str_cmp(const void *a, const void *b) +{ + const char **str1 = (const char **)a; + const char **str2 = (const char **)b; + + return strcmp(*str1, *str2); +} + +static int line_cnt_cmp(const void *a, const void *b) +{ + const struct line_cnt *a_cnt = (const struct line_cnt *)a; + const struct line_cnt *b_cnt = (const struct line_cnt *)b; + + if (a_cnt->cnt != b_cnt->cnt) + return a_cnt->cnt < b_cnt->cnt ? -1 : 1; + return strcmp(a_cnt->line, b_cnt->line); +} + +static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name) +{ + int lines_cap = 0; + int lines_size = 0; + char **lines = NULL; + char *line = NULL; + char *state; + struct line_cnt *freq = NULL; + struct line_cnt *cur; + int unique_lines; + int err = 0; + int i; + + while ((line = strtok_r(line ? NULL : buf, "\n", &state))) { + if (strncmp(line, "; ", 2) != 0) + continue; + line += 2; + + if (lines_size == lines_cap) { + char **tmp; + + lines_cap = max(16, lines_cap * 2); + tmp = realloc(lines, lines_cap * sizeof(*tmp)); + if (!tmp) { + err = -ENOMEM; + goto cleanup; + } + lines = tmp; + } + lines[lines_size] = line; + lines_size++; + } + + if (lines_size == 0) + goto cleanup; + + qsort(lines, lines_size, sizeof(*lines), str_cmp); + + freq = calloc(lines_size, sizeof(*freq)); + if (!freq) { + err = -ENOMEM; + goto cleanup; + } + + cur = freq; + cur->line = lines[0]; + cur->cnt = 1; + for (i = 1; i < lines_size; ++i) { + if (strcmp(lines[i], cur->line) != 0) { + cur++; + cur->line = lines[i]; + cur->cnt = 0; + } + cur->cnt++; + } + unique_lines = cur - freq + 1; + + qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp); + + printf("Top source lines (%s):\n", prog_name); + for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) { + const char *src_code = freq[i].line; + const char *src_line = NULL; + char *split = strrchr(freq[i].line, '@'); + + if (split) { + src_line = split + 1; + + while (*src_line && isspace(*src_line)) + src_line++; + + while (split > src_code && isspace(*split)) + split--; + *split = '\0'; + } + + if (src_line) + printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code); + else + printf("%5d: %s\n", freq[i].cnt, src_code); + } + printf("\n"); + +cleanup: + free(freq); + free(lines); + return err; +} + static int guess_prog_type_by_ctx_name(const char *ctx_name, enum bpf_prog_type *prog_type, enum bpf_attach_type *attach_type) @@ -1009,13 +1131,16 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats = &env.prog_stats[env.prog_stat_cnt++]; memset(stats, 0, sizeof(*stats)); - if (env.verbose) { + if (env.verbose || env.top_src_lines > 0) { buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024; buf = malloc(buf_sz); if (!buf) return -ENOMEM; /* ensure we always request stats */ log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0); + /* --top-src-lines needs verifier log */ + if (env.top_src_lines > 0 && env.log_level == 0) + log_level |= 2; } else { buf = verif_log_buf; buf_sz = sizeof(verif_log_buf); @@ -1048,6 +1173,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf filename, prog_name, stats->stats[DURATION], err ? "failure" : "success", buf); } + if (env.top_src_lines > 0) + print_top_src_lines(buf, buf_sz, stats->prog_name); if (verif_log_buf != buf) free(buf); -- 2.51.0 From 710fbca820c721cdd60fa8c5bbe9deb4c0788aae Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 27 Sep 2024 15:13:52 +0200 Subject: [PATCH 11/16] libbpf: Add missing per-arch include path MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit libbpf does not include the per-arch tools include path, e.g. tools/arch/riscv/include. Some architectures depend those files to build properly. Include tools/arch/$(SUBARCH)/include in the libbpf build. Fixes: 6d74d178fe6e ("tools: Add riscv barrier implementation") Signed-off-by: Björn Töpel Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240927131355.350918-1-bjorn@kernel.org --- tools/lib/bpf/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 1b22f0f37288..857a5f7b413d 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -61,7 +61,8 @@ ifndef VERBOSE endif INCLUDES = -I$(or $(OUTPUT),.) \ - -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi + -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \ + -I$(srctree)/tools/arch/$(SRCARCH)/include export prefix libdir src obj -- 2.51.0 From 19090f0306f1748980596c6c71f1c4b128639cff Mon Sep 17 00:00:00 2001 From: =?utf8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Fri, 27 Sep 2024 15:13:53 +0200 Subject: [PATCH 12/16] selftests: bpf: Add missing per-arch include path MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The prog_tests programs do not include the per-arch tools include path, e.g. tools/arch/riscv/include. Some architectures depend those files to build properly. Include tools/arch/$(SUBARCH)/include in the selftests bpf build. Fixes: 6d74d178fe6e ("tools: Add riscv barrier implementation") Signed-off-by: Björn Töpel Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240927131355.350918-2-bjorn@kernel.org --- tools/testing/selftests/bpf/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e295e3df5ec6..28a76baa854d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -10,6 +10,7 @@ TOOLSDIR := $(abspath ../../..) LIBDIR := $(TOOLSDIR)/lib BPFDIR := $(LIBDIR)/bpf TOOLSINCDIR := $(TOOLSDIR)/include +TOOLSARCHINCDIR := $(TOOLSDIR)/arch/$(SRCARCH)/include BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool APIDIR := $(TOOLSINCDIR)/uapi ifneq ($(O),) @@ -44,7 +45,7 @@ CFLAGS += -g $(OPT_FLAGS) -rdynamic \ -Wall -Werror -fno-omit-frame-pointer \ $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ - -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) + -I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT) LDFLAGS += $(SAN_LDFLAGS) LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread -- 2.51.0 From 4236f114a3ffbbfd217436c08852e94cae372f57 Mon Sep 17 00:00:00 2001 From: Yuan Chen Date: Mon, 30 Sep 2024 10:41:15 +0800 Subject: [PATCH 13/16] bpf: Fix the xdp_adjust_tail sample prog issue During the xdp_adjust_tail test, probabilistic failure occurs and SKB package is discarded by the kernel. After checking the issues by tracking SKB package, it is identified that they were caused by checksum errors. Refer to checksum of the arch/arm64/include/asm/checksum.h for fixing. v2: Based on Alexei Starovoitov's suggestions, it is necessary to keep the code implementation consistent. Fixes: c6ffd1ff7856 (bpf: add bpf_xdp_adjust_tail sample prog) Signed-off-by: Yuan Chen Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20240930024115.52841-1-chenyuan_fl@163.com --- samples/bpf/xdp_adjust_tail_kern.c | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/bpf/xdp_adjust_tail_kern.c b/samples/bpf/xdp_adjust_tail_kern.c index ffdd548627f0..da67bcad1c63 100644 --- a/samples/bpf/xdp_adjust_tail_kern.c +++ b/samples/bpf/xdp_adjust_tail_kern.c @@ -57,6 +57,7 @@ static __always_inline void swap_mac(void *data, struct ethhdr *orig_eth) static __always_inline __u16 csum_fold_helper(__u32 csum) { + csum = (csum & 0xffff) + (csum >> 16); return ~((csum & 0xffff) + (csum >> 16)); } -- 2.51.0 From c50fc1cbfd71dcb1d70fd593b2af7c92af465921 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Wed, 2 Oct 2024 07:32:42 +0800 Subject: [PATCH 14/16] bpf: syscall_nrs: Disable no previous prototype warnning MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit In some environments (gcc treated as error in W=1, which is default), if we make -C samples/bpf/, it will be stopped because of "no previous prototype" error like this: ../samples/bpf/syscall_nrs.c:7:6: error: no previous prototype for ‘syscall_defines’ [-Werror=missing-prototypes] void syscall_defines(void) ^~~~~~~~~~~~~~~ Actually, this file meets our expectatations because it will be converted to a .h file. In this way, it's correct. Considering the warnning stopping us compiling, we can remove the warnning directly. Signed-off-by: Jason Xing Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/all/20241001012540.39007-1-kerneljasonxing@gmail.com/ Link: https://lore.kernel.org/all/CAEf4BzaVdr_0kQo=+jPLN++PvcU6pwTjaPVEA880kgDN94TZYw@mail.gmail.com/ Link: https://lore.kernel.org/bpf/20241001233242.98679-1-kerneljasonxing@gmail.com --- samples/bpf/syscall_nrs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samples/bpf/syscall_nrs.c b/samples/bpf/syscall_nrs.c index 88f940052450..a6e600f3d477 100644 --- a/samples/bpf/syscall_nrs.c +++ b/samples/bpf/syscall_nrs.c @@ -2,6 +2,9 @@ #include #include +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-prototypes" + #define SYSNR(_NR) DEFINE(SYS ## _NR, _NR) void syscall_defines(void) @@ -17,3 +20,5 @@ void syscall_defines(void) #endif } + +#pragma GCC diagnostic pop -- 2.51.0 From 4b146e95da87bf0fe64502aaafebeb622dfff653 Mon Sep 17 00:00:00 2001 From: Eric Long Date: Wed, 2 Oct 2024 14:25:06 +0800 Subject: [PATCH 15/16] libbpf: Do not resolve size on duplicate FUNCs FUNCs do not have sizes, thus currently btf__resolve_size will fail with -EINVAL. Add conditions so that we only update size when the BTF object is not function or function prototype. Signed-off-by: Eric Long Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241002-libbpf-dup-extern-funcs-v4-1-560eb460ff90@hack3r.moe --- tools/lib/bpf/linker.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 81dbbdd79a7c..f83c1c29982c 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -2451,6 +2451,10 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) if (glob_sym && glob_sym->var_idx >= 0) { __s64 sz; + /* FUNCs don't have size, nothing to update */ + if (btf_is_func(t)) + continue; + dst_var = &dst_sec->sec_vars[glob_sym->var_idx]; /* Because underlying BTF type might have * changed, so might its size have changed, so -- 2.51.0 From 3c591de2854381e313ec149bc1bbd8360f9ed53b Mon Sep 17 00:00:00 2001 From: Eric Long Date: Wed, 2 Oct 2024 14:25:07 +0800 Subject: [PATCH 16/16] selftests/bpf: Test linking with duplicate extern functions Previously when multiple BPF object files referencing the same extern function (usually kfunc) are statically linked using `bpftool gen object`, libbpf tries to get the nonexistent size of BTF_KIND_FUNC_PROTO and fails. This test ensures it is fixed. Signed-off-by: Eric Long Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241002-libbpf-dup-extern-funcs-v4-2-560eb460ff90@hack3r.moe --- tools/testing/selftests/bpf/progs/linked_funcs1.c | 8 ++++++++ tools/testing/selftests/bpf/progs/linked_funcs2.c | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c index cc79dddac182..049a1f78de3f 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -63,6 +63,8 @@ extern int set_output_val2(int x); /* here we'll force set_output_ctx2() to be __hidden in the final obj file */ __hidden extern void set_output_ctx2(__u64 *ctx); +void *bpf_cast_to_kern_ctx(void *obj) __ksym; + SEC("?raw_tp/sys_enter") int BPF_PROG(handler1, struct pt_regs *regs, long id) { @@ -86,4 +88,10 @@ int BPF_PROG(handler1, struct pt_regs *regs, long id) return 0; } +/* Generate BTF FUNC record and test linking with duplicate extern functions */ +void kfunc_gen1(void) +{ + bpf_cast_to_kern_ctx(0); +} + char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c index 942cc5526ddf..96850759fd8d 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -63,6 +63,8 @@ extern int set_output_val1(int x); /* here we'll force set_output_ctx1() to be __hidden in the final obj file */ __hidden extern void set_output_ctx1(__u64 *ctx); +void *bpf_cast_to_kern_ctx(void *obj) __ksym; + SEC("?raw_tp/sys_enter") int BPF_PROG(handler2, struct pt_regs *regs, long id) { @@ -86,4 +88,10 @@ int BPF_PROG(handler2, struct pt_regs *regs, long id) return 0; } +/* Generate BTF FUNC record and test linking with duplicate extern functions */ +void kfunc_gen2(void) +{ + bpf_cast_to_kern_ctx(0); +} + char LICENSE[] SEC("license") = "GPL"; -- 2.51.0