return proglen;
 }
 
+struct x64_jit_data {
+       struct bpf_binary_header *header;
+       int *addrs;
+       u8 *image;
+       int proglen;
+       struct jit_context ctx;
+};
+
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
        struct bpf_binary_header *header = NULL;
        struct bpf_prog *tmp, *orig_prog = prog;
+       struct x64_jit_data *jit_data;
        int proglen, oldproglen = 0;
        struct jit_context ctx = {};
        bool tmp_blinded = false;
+       bool extra_pass = false;
        u8 *image = NULL;
        int *addrs;
        int pass;
                prog = tmp;
        }
 
+       jit_data = prog->aux->jit_data;
+       if (!jit_data) {
+               jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
+               if (!jit_data) {
+                       prog = orig_prog;
+                       goto out;
+               }
+               prog->aux->jit_data = jit_data;
+       }
+       addrs = jit_data->addrs;
+       if (addrs) {
+               ctx = jit_data->ctx;
+               oldproglen = jit_data->proglen;
+               image = jit_data->image;
+               header = jit_data->header;
+               extra_pass = true;
+               goto skip_init_addrs;
+       }
        addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
        if (!addrs) {
                prog = orig_prog;
-               goto out;
+               goto out_addrs;
        }
 
        /* Before first pass, make a rough estimation of addrs[]
                addrs[i] = proglen;
        }
        ctx.cleanup_addr = proglen;
+skip_init_addrs:
 
        /* JITed image shrinks with every pass and the loop iterates
         * until the image stops shrinking. Very large bpf programs
 
        if (image) {
                bpf_flush_icache(header, image + proglen);
-               bpf_jit_binary_lock_ro(header);
+               if (!prog->is_func || extra_pass) {
+                       bpf_jit_binary_lock_ro(header);
+               } else {
+                       jit_data->addrs = addrs;
+                       jit_data->ctx = ctx;
+                       jit_data->proglen = proglen;
+                       jit_data->image = image;
+                       jit_data->header = header;
+               }
                prog->bpf_func = (void *)image;
                prog->jited = 1;
                prog->jited_len = proglen;
                prog = orig_prog;
        }
 
+       if (!prog->is_func || extra_pass) {
 out_addrs:
-       kfree(addrs);
+               kfree(addrs);
+               kfree(jit_data);
+               prog->aux->jit_data = NULL;
+       }
 out:
        if (tmp_blinded)
                bpf_jit_prog_release_other(prog, prog == orig_prog ?
 
        struct bpf_insn *insn;
        int i, rewritten;
 
-       if (!bpf_jit_blinding_enabled(prog))
+       if (!bpf_jit_blinding_enabled(prog) || prog->blinded)
                return prog;
 
        clone = bpf_prog_clone_create(prog, GFP_USER);
                i        += insn_delta;
        }
 
+       clone->blinded = 1;
        return clone;
 }
 #endif /* CONFIG_BPF_JIT */
 static void bpf_prog_free_deferred(struct work_struct *work)
 {
        struct bpf_prog_aux *aux;
+       int i;
 
        aux = container_of(work, struct bpf_prog_aux, work);
        if (bpf_prog_is_dev_bound(aux))
                bpf_prog_offload_destroy(aux->prog);
-       bpf_jit_free(aux->prog);
+       for (i = 0; i < aux->func_cnt; i++)
+               bpf_jit_free(aux->func[i]);
+       if (aux->func_cnt) {
+               kfree(aux->func);
+               bpf_prog_unlock_free(aux->prog);
+       } else {
+               bpf_jit_free(aux->prog);
+       }
 }
 
 /* Free internal BPF program */
 
        return 0;
 }
 
+static int jit_subprogs(struct bpf_verifier_env *env)
+{
+       struct bpf_prog *prog = env->prog, **func, *tmp;
+       int i, j, subprog_start, subprog_end = 0, len, subprog;
+       struct bpf_insn *insn = prog->insnsi;
+       void *old_bpf_func;
+       int err = -ENOMEM;
+
+       if (env->subprog_cnt == 0)
+               return 0;
+
+       for (i = 0; i < prog->len; i++, insn++) {
+               if (insn->code != (BPF_JMP | BPF_CALL) ||
+                   insn->src_reg != BPF_PSEUDO_CALL)
+                       continue;
+               subprog = find_subprog(env, i + insn->imm + 1);
+               if (subprog < 0) {
+                       WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+                                 i + insn->imm + 1);
+                       return -EFAULT;
+               }
+               /* temporarily remember subprog id inside insn instead of
+                * aux_data, since next loop will split up all insns into funcs
+                */
+               insn->off = subprog + 1;
+               /* remember original imm in case JIT fails and fallback
+                * to interpreter will be needed
+                */
+               env->insn_aux_data[i].call_imm = insn->imm;
+               /* point imm to __bpf_call_base+1 from JITs point of view */
+               insn->imm = 1;
+       }
+
+       func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
+       if (!func)
+               return -ENOMEM;
+
+       for (i = 0; i <= env->subprog_cnt; i++) {
+               subprog_start = subprog_end;
+               if (env->subprog_cnt == i)
+                       subprog_end = prog->len;
+               else
+                       subprog_end = env->subprog_starts[i];
+
+               len = subprog_end - subprog_start;
+               func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
+               if (!func[i])
+                       goto out_free;
+               memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
+                      len * sizeof(struct bpf_insn));
+               func[i]->len = len;
+               func[i]->is_func = 1;
+               /* Use bpf_prog_F_tag to indicate functions in stack traces.
+                * Long term would need debug info to populate names
+                */
+               func[i]->aux->name[0] = 'F';
+               func[i]->aux->stack_depth = env->subprog_stack_depth[i];
+               func[i]->jit_requested = 1;
+               func[i] = bpf_int_jit_compile(func[i]);
+               if (!func[i]->jited) {
+                       err = -ENOTSUPP;
+                       goto out_free;
+               }
+               cond_resched();
+       }
+       /* at this point all bpf functions were successfully JITed
+        * now populate all bpf_calls with correct addresses and
+        * run last pass of JIT
+        */
+       for (i = 0; i <= env->subprog_cnt; i++) {
+               insn = func[i]->insnsi;
+               for (j = 0; j < func[i]->len; j++, insn++) {
+                       if (insn->code != (BPF_JMP | BPF_CALL) ||
+                           insn->src_reg != BPF_PSEUDO_CALL)
+                               continue;
+                       subprog = insn->off;
+                       insn->off = 0;
+                       insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
+                               func[subprog]->bpf_func -
+                               __bpf_call_base;
+               }
+       }
+       for (i = 0; i <= env->subprog_cnt; i++) {
+               old_bpf_func = func[i]->bpf_func;
+               tmp = bpf_int_jit_compile(func[i]);
+               if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
+                       verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
+                       err = -EFAULT;
+                       goto out_free;
+               }
+               cond_resched();
+       }
+
+       /* finally lock prog and jit images for all functions and
+        * populate kallsysm
+        */
+       for (i = 0; i <= env->subprog_cnt; i++) {
+               bpf_prog_lock_ro(func[i]);
+               bpf_prog_kallsyms_add(func[i]);
+       }
+       prog->jited = 1;
+       prog->bpf_func = func[0]->bpf_func;
+       prog->aux->func = func;
+       prog->aux->func_cnt = env->subprog_cnt + 1;
+       return 0;
+out_free:
+       for (i = 0; i <= env->subprog_cnt; i++)
+               if (func[i])
+                       bpf_jit_free(func[i]);
+       kfree(func);
+       /* cleanup main prog to be interpreted */
+       prog->jit_requested = 0;
+       for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+               if (insn->code != (BPF_JMP | BPF_CALL) ||
+                   insn->src_reg != BPF_PSEUDO_CALL)
+                       continue;
+               insn->off = 0;
+               insn->imm = env->insn_aux_data[i].call_imm;
+       }
+       return err;
+}
+
 static int fixup_call_args(struct bpf_verifier_env *env)
 {
        struct bpf_prog *prog = env->prog;
        struct bpf_insn *insn = prog->insnsi;
        int i, depth;
 
+       if (env->prog->jit_requested)
+               if (jit_subprogs(env) == 0)
+                       return 0;
+
        for (i = 0; i < prog->len; i++, insn++) {
                if (insn->code != (BPF_JMP | BPF_CALL) ||
                    insn->src_reg != BPF_PSEUDO_CALL)