[BTF_KIND_FLOAT]        = "FLOAT",
 };
 
-static const char *btf_type_str(const struct btf_type *t)
+const char *btf_type_str(const struct btf_type *t)
 {
        return btf_kind_str[BTF_INFO_KIND(t->info)];
 }
        return btf_check_func_type_match(log, btf1, t1, btf2, t2);
 }
 
+static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
+#ifdef CONFIG_NET
+       [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+       [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+       [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
+#endif
+};
+
 static int btf_check_func_arg_match(struct bpf_verifier_env *env,
                                    const struct btf *btf, u32 func_id,
                                    struct bpf_reg_state *regs,
        const char *func_name, *ref_tname;
        const struct btf_type *t, *ref_t;
        const struct btf_param *args;
-       u32 i, nargs;
+       u32 i, nargs, ref_id;
 
        t = btf_type_by_id(btf, func_id);
        if (!t || !btf_type_is_func(t)) {
                /* These checks were already done by the verifier while loading
-                * struct bpf_func_info
+                * struct bpf_func_info or in add_kfunc_call().
                 */
                bpf_log(log, "BTF of func_id %u doesn't point to KIND_FUNC\n",
                        func_id);
                        return -EINVAL;
                }
 
-               ref_t = btf_type_skip_modifiers(btf, t->type, NULL);
+               ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
                ref_tname = btf_name_by_offset(btf, ref_t->name_off);
-               if (btf_get_prog_ctx_type(log, btf, t, env->prog->type, i)) {
+               if (btf_is_kernel(btf)) {
+                       const struct btf_type *reg_ref_t;
+                       const struct btf *reg_btf;
+                       const char *reg_ref_tname;
+                       u32 reg_ref_id;
+
+                       if (!btf_type_is_struct(ref_t)) {
+                               bpf_log(log, "kernel function %s args#%d pointer type %s %s is not supported\n",
+                                       func_name, i, btf_type_str(ref_t),
+                                       ref_tname);
+                               return -EINVAL;
+                       }
+
+                       if (reg->type == PTR_TO_BTF_ID) {
+                               reg_btf = reg->btf;
+                               reg_ref_id = reg->btf_id;
+                       } else if (reg2btf_ids[reg->type]) {
+                               reg_btf = btf_vmlinux;
+                               reg_ref_id = *reg2btf_ids[reg->type];
+                       } else {
+                               bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d is not a pointer to btf_id\n",
+                                       func_name, i,
+                                       btf_type_str(ref_t), ref_tname, regno);
+                               return -EINVAL;
+                       }
+
+                       reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id,
+                                                           ®_ref_id);
+                       reg_ref_tname = btf_name_by_offset(reg_btf,
+                                                          reg_ref_t->name_off);
+                       if (!btf_struct_ids_match(log, reg_btf, reg_ref_id,
+                                                 reg->off, btf, ref_id)) {
+                               bpf_log(log, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
+                                       func_name, i,
+                                       btf_type_str(ref_t), ref_tname,
+                                       regno, btf_type_str(reg_ref_t),
+                                       reg_ref_tname);
+                               return -EINVAL;
+                       }
+               } else if (btf_get_prog_ctx_type(log, btf, t,
+                                                env->prog->type, i)) {
                        /* If function expects ctx type in BTF check that caller
                         * is passing PTR_TO_CTX.
                         */
        return err;
 }
 
+int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
+                             const struct btf *btf, u32 func_id,
+                             struct bpf_reg_state *regs)
+{
+       return btf_check_func_arg_match(env, btf, func_id, regs, false);
+}
+
 /* Convert BTF of a function into bpf_reg_state if possible
  * Returns:
  * EFAULT - there is a verifier bug. Abort verification.
 
               insn->src_reg == BPF_PSEUDO_CALL;
 }
 
+static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
+{
+       return insn->code == (BPF_JMP | BPF_CALL) &&
+              insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
+}
+
 static bool bpf_pseudo_func(const struct bpf_insn *insn)
 {
        return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
                verbose(env, "too many subprograms\n");
                return -E2BIG;
        }
+       /* determine subprog starts. The end is one before the next starts */
        env->subprog_info[env->subprog_cnt++].start = off;
        sort(env->subprog_info, env->subprog_cnt,
             sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
        return env->subprog_cnt - 1;
 }
 
-static int check_subprogs(struct bpf_verifier_env *env)
+struct bpf_kfunc_desc {
+       struct btf_func_model func_model;
+       u32 func_id;
+       s32 imm;
+};
+
+#define MAX_KFUNC_DESCS 256
+struct bpf_kfunc_desc_tab {
+       struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
+       u32 nr_descs;
+};
+
+static int kfunc_desc_cmp_by_id(const void *a, const void *b)
+{
+       const struct bpf_kfunc_desc *d0 = a;
+       const struct bpf_kfunc_desc *d1 = b;
+
+       /* func_id is not greater than BTF_MAX_TYPE */
+       return d0->func_id - d1->func_id;
+}
+
+static const struct bpf_kfunc_desc *
+find_kfunc_desc(const struct bpf_prog *prog, u32 func_id)
+{
+       struct bpf_kfunc_desc desc = {
+               .func_id = func_id,
+       };
+       struct bpf_kfunc_desc_tab *tab;
+
+       tab = prog->aux->kfunc_tab;
+       return bsearch(&desc, tab->descs, tab->nr_descs,
+                      sizeof(tab->descs[0]), kfunc_desc_cmp_by_id);
+}
+
+static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
+{
+       const struct btf_type *func, *func_proto;
+       struct bpf_kfunc_desc_tab *tab;
+       struct bpf_prog_aux *prog_aux;
+       struct bpf_kfunc_desc *desc;
+       const char *func_name;
+       unsigned long addr;
+       int err;
+
+       prog_aux = env->prog->aux;
+       tab = prog_aux->kfunc_tab;
+       if (!tab) {
+               if (!btf_vmlinux) {
+                       verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
+                       return -ENOTSUPP;
+               }
+
+               if (!env->prog->jit_requested) {
+                       verbose(env, "JIT is required for calling kernel function\n");
+                       return -ENOTSUPP;
+               }
+
+               if (!bpf_jit_supports_kfunc_call()) {
+                       verbose(env, "JIT does not support calling kernel function\n");
+                       return -ENOTSUPP;
+               }
+
+               if (!env->prog->gpl_compatible) {
+                       verbose(env, "cannot call kernel function from non-GPL compatible program\n");
+                       return -EINVAL;
+               }
+
+               tab = kzalloc(sizeof(*tab), GFP_KERNEL);
+               if (!tab)
+                       return -ENOMEM;
+               prog_aux->kfunc_tab = tab;
+       }
+
+       if (find_kfunc_desc(env->prog, func_id))
+               return 0;
+
+       if (tab->nr_descs == MAX_KFUNC_DESCS) {
+               verbose(env, "too many different kernel function calls\n");
+               return -E2BIG;
+       }
+
+       func = btf_type_by_id(btf_vmlinux, func_id);
+       if (!func || !btf_type_is_func(func)) {
+               verbose(env, "kernel btf_id %u is not a function\n",
+                       func_id);
+               return -EINVAL;
+       }
+       func_proto = btf_type_by_id(btf_vmlinux, func->type);
+       if (!func_proto || !btf_type_is_func_proto(func_proto)) {
+               verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
+                       func_id);
+               return -EINVAL;
+       }
+
+       func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
+       addr = kallsyms_lookup_name(func_name);
+       if (!addr) {
+               verbose(env, "cannot find address for kernel function %s\n",
+                       func_name);
+               return -EINVAL;
+       }
+
+       desc = &tab->descs[tab->nr_descs++];
+       desc->func_id = func_id;
+       desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base;
+       err = btf_distill_func_proto(&env->log, btf_vmlinux,
+                                    func_proto, func_name,
+                                    &desc->func_model);
+       if (!err)
+               sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+                    kfunc_desc_cmp_by_id, NULL);
+       return err;
+}
+
+static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
+{
+       const struct bpf_kfunc_desc *d0 = a;
+       const struct bpf_kfunc_desc *d1 = b;
+
+       if (d0->imm > d1->imm)
+               return 1;
+       else if (d0->imm < d1->imm)
+               return -1;
+       return 0;
+}
+
+static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
+{
+       struct bpf_kfunc_desc_tab *tab;
+
+       tab = prog->aux->kfunc_tab;
+       if (!tab)
+               return;
+
+       sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
+            kfunc_desc_cmp_by_imm, NULL);
+}
+
+bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
+{
+       return !!prog->aux->kfunc_tab;
+}
+
+const struct btf_func_model *
+bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
+                        const struct bpf_insn *insn)
+{
+       const struct bpf_kfunc_desc desc = {
+               .imm = insn->imm,
+       };
+       const struct bpf_kfunc_desc *res;
+       struct bpf_kfunc_desc_tab *tab;
+
+       tab = prog->aux->kfunc_tab;
+       res = bsearch(&desc, tab->descs, tab->nr_descs,
+                     sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
+
+       return res ? &res->func_model : NULL;
+}
+
+static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 {
-       int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
        struct bpf_subprog_info *subprog = env->subprog_info;
        struct bpf_insn *insn = env->prog->insnsi;
-       int insn_cnt = env->prog->len;
+       int i, ret, insn_cnt = env->prog->len;
 
        /* Add entry function. */
        ret = add_subprog(env, 0);
-       if (ret < 0)
+       if (ret)
                return ret;
 
-       /* determine subprog starts. The end is one before the next starts */
-       for (i = 0; i < insn_cnt; i++) {
-               if (bpf_pseudo_func(insn + i)) {
-                       if (!env->bpf_capable) {
-                               verbose(env,
-                                       "function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
-                               return -EPERM;
-                       }
-                       ret = add_subprog(env, i + insn[i].imm + 1);
-                       if (ret < 0)
-                               return ret;
-                       /* remember subprog */
-                       insn[i + 1].imm = ret;
-                       continue;
-               }
-               if (!bpf_pseudo_call(insn + i))
+       for (i = 0; i < insn_cnt; i++, insn++) {
+               if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
+                   !bpf_pseudo_kfunc_call(insn))
                        continue;
+
                if (!env->bpf_capable) {
-                       verbose(env,
-                               "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
+                       verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
                        return -EPERM;
                }
-               ret = add_subprog(env, i + insn[i].imm + 1);
+
+               if (bpf_pseudo_func(insn)) {
+                       ret = add_subprog(env, i + insn->imm + 1);
+                       if (ret >= 0)
+                               /* remember subprog */
+                               insn[1].imm = ret;
+               } else if (bpf_pseudo_call(insn)) {
+                       ret = add_subprog(env, i + insn->imm + 1);
+               } else {
+                       ret = add_kfunc_call(env, insn->imm);
+               }
+
                if (ret < 0)
                        return ret;
        }
                for (i = 0; i < env->subprog_cnt; i++)
                        verbose(env, "func#%d @%d\n", i, subprog[i].start);
 
+       return 0;
+}
+
+static int check_subprogs(struct bpf_verifier_env *env)
+{
+       int i, subprog_start, subprog_end, off, cur_subprog = 0;
+       struct bpf_subprog_info *subprog = env->subprog_info;
+       struct bpf_insn *insn = env->prog->insnsi;
+       int insn_cnt = env->prog->len;
+
        /* now check that all jumps are within the same subprog */
        subprog_start = subprog[cur_subprog].start;
        subprog_end = subprog[cur_subprog + 1].start;
        return i;
 }
 
+static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
+{
+       const struct btf_type *func;
+
+       if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
+               return NULL;
+
+       func = btf_type_by_id(btf_vmlinux, insn->imm);
+       return btf_name_by_offset(btf_vmlinux, func->name_off);
+}
+
 /* For given verifier state backtrack_insn() is called from the last insn to
  * the first insn. Its purpose is to compute a bitmask of registers and
  * stack slots that needs precision in the parent verifier state.
                          u32 *reg_mask, u64 *stack_mask)
 {
        const struct bpf_insn_cbs cbs = {
+               .cb_call        = disasm_kfunc_name,
                .cb_print       = verbose,
                .private_data   = env,
        };
        return 0;
 }
 
+/* mark_btf_func_reg_size() is used when the reg size is determined by
+ * the BTF func_proto's return value size and argument.
+ */
+static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
+                                  size_t reg_size)
+{
+       struct bpf_reg_state *reg = &cur_regs(env)[regno];
+
+       if (regno == BPF_REG_0) {
+               /* Function return value */
+               reg->live |= REG_LIVE_WRITTEN;
+               reg->subreg_def = reg_size == sizeof(u64) ?
+                       DEF_NOT_SUBREG : env->insn_idx + 1;
+       } else {
+               /* Function argument */
+               if (reg_size == sizeof(u64)) {
+                       mark_insn_zext(env, reg);
+                       mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
+               } else {
+                       mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
+               }
+       }
+}
+
+static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
+{
+       const struct btf_type *t, *func, *func_proto, *ptr_type;
+       struct bpf_reg_state *regs = cur_regs(env);
+       const char *func_name, *ptr_type_name;
+       u32 i, nargs, func_id, ptr_type_id;
+       const struct btf_param *args;
+       int err;
+
+       func_id = insn->imm;
+       func = btf_type_by_id(btf_vmlinux, func_id);
+       func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
+       func_proto = btf_type_by_id(btf_vmlinux, func->type);
+
+       if (!env->ops->check_kfunc_call ||
+           !env->ops->check_kfunc_call(func_id)) {
+               verbose(env, "calling kernel function %s is not allowed\n",
+                       func_name);
+               return -EACCES;
+       }
+
+       /* Check the arguments */
+       err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs);
+       if (err)
+               return err;
+
+       for (i = 0; i < CALLER_SAVED_REGS; i++)
+               mark_reg_not_init(env, regs, caller_saved[i]);
+
+       /* Check return type */
+       t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL);
+       if (btf_type_is_scalar(t)) {
+               mark_reg_unknown(env, regs, BPF_REG_0);
+               mark_btf_func_reg_size(env, BPF_REG_0, t->size);
+       } else if (btf_type_is_ptr(t)) {
+               ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type,
+                                                  &ptr_type_id);
+               if (!btf_type_is_struct(ptr_type)) {
+                       ptr_type_name = btf_name_by_offset(btf_vmlinux,
+                                                          ptr_type->name_off);
+                       verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
+                               func_name, btf_type_str(ptr_type),
+                               ptr_type_name);
+                       return -EINVAL;
+               }
+               mark_reg_known_zero(env, regs, BPF_REG_0);
+               regs[BPF_REG_0].btf = btf_vmlinux;
+               regs[BPF_REG_0].type = PTR_TO_BTF_ID;
+               regs[BPF_REG_0].btf_id = ptr_type_id;
+               mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
+       } /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
+
+       nargs = btf_type_vlen(func_proto);
+       args = (const struct btf_param *)(func_proto + 1);
+       for (i = 0; i < nargs; i++) {
+               u32 regno = i + 1;
+
+               t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL);
+               if (btf_type_is_ptr(t))
+                       mark_btf_func_reg_size(env, regno, sizeof(void *));
+               else
+                       /* scalar. ensured by btf_check_kfunc_arg_match() */
+                       mark_btf_func_reg_size(env, regno, t->size);
+       }
+
+       return 0;
+}
+
 static bool signed_add_overflows(s64 a, s64 b)
 {
        /* Do the add in u64, where overflow is well-defined */
 
                if (env->log.level & BPF_LOG_LEVEL) {
                        const struct bpf_insn_cbs cbs = {
+                               .cb_call        = disasm_kfunc_name,
                                .cb_print       = verbose,
                                .private_data   = env,
                        };
                                if (BPF_SRC(insn->code) != BPF_K ||
                                    insn->off != 0 ||
                                    (insn->src_reg != BPF_REG_0 &&
-                                    insn->src_reg != BPF_PSEUDO_CALL) ||
+                                    insn->src_reg != BPF_PSEUDO_CALL &&
+                                    insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
                                    insn->dst_reg != BPF_REG_0 ||
                                    class == BPF_JMP32) {
                                        verbose(env, "BPF_CALL uses reserved fields\n");
                                }
                                if (insn->src_reg == BPF_PSEUDO_CALL)
                                        err = check_func_call(env, insn, &env->insn_idx);
+                               else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
+                                       err = check_kfunc_call(env, insn);
                                else
                                        err = check_helper_call(env, insn, &env->insn_idx);
                                if (err)
                func[i]->aux->name[0] = 'F';
                func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
                func[i]->jit_requested = 1;
+               func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
                func[i]->aux->linfo = prog->aux->linfo;
                func[i]->aux->nr_linfo = prog->aux->nr_linfo;
                func[i]->aux->jited_linfo = prog->aux->jited_linfo;
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
        struct bpf_prog *prog = env->prog;
        struct bpf_insn *insn = prog->insnsi;
+       bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
        int i, depth;
 #endif
        int err = 0;
                        return err;
        }
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
+       if (has_kfunc_call) {
+               verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
+               return -EINVAL;
+       }
        if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
                /* When JIT fails the progs with bpf2bpf calls and tail_calls
                 * have to be rejected, since interpreter doesn't support them yet.
        return err;
 }
 
+static int fixup_kfunc_call(struct bpf_verifier_env *env,
+                           struct bpf_insn *insn)
+{
+       const struct bpf_kfunc_desc *desc;
+
+       /* insn->imm has the btf func_id. Replace it with
+        * an address (relative to __bpf_base_call).
+        */
+       desc = find_kfunc_desc(env->prog, insn->imm);
+       if (!desc) {
+               verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
+                       insn->imm);
+               return -EFAULT;
+       }
+
+       insn->imm = desc->imm;
+
+       return 0;
+}
+
 /* Do various post-verification rewrites in a single program pass.
  * These rewrites simplify JIT and interpreter implementations.
  */
                        continue;
                if (insn->src_reg == BPF_PSEUDO_CALL)
                        continue;
+               if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
+                       ret = fixup_kfunc_call(env, insn);
+                       if (ret)
+                               return ret;
+                       continue;
+               }
 
                if (insn->imm == BPF_FUNC_get_route_realm)
                        prog->dst_needed = 1;
                }
        }
 
+       sort_kfunc_descs_by_imm(env->prog);
+
        return 0;
 }
 
        if (!env->explored_states)
                goto skip_full_check;
 
+       ret = add_subprog_and_kfunc(env);
+       if (ret < 0)
+               goto skip_full_check;
+
        ret = check_subprogs(env);
        if (ret < 0)
                goto skip_full_check;