/* Pick a register outside of BPF range for JIT internal work */
 #define AUX_REG (MAX_BPF_JIT_REG + 1)
 #define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
+#define X86_REG_R12 (MAX_BPF_JIT_REG + 3)
 
 /*
  * The following table maps BPF registers to x86-64 registers.
        [BPF_REG_AX] = 2, /* R10 temp register */
        [AUX_REG] = 3,    /* R11 temp register */
        [X86_REG_R9] = 1, /* R9 register, 6th function argument */
+       [X86_REG_R12] = 4, /* R12 callee saved */
 };
 
 static const int reg2pt_regs[] = {
                             BIT(BPF_REG_8) |
                             BIT(BPF_REG_9) |
                             BIT(X86_REG_R9) |
+                            BIT(X86_REG_R12) |
                             BIT(BPF_REG_AX));
 }
 
        return byte;
 }
 
+static u8 add_3mod(u8 byte, u32 r1, u32 r2, u32 index)
+{
+       if (is_ereg(r1))
+               byte |= 1;
+       if (is_ereg(index))
+               byte |= 2;
+       if (is_ereg(r2))
+               byte |= 4;
+       return byte;
+}
+
 /* Encode 'dst_reg' register into x86-64 opcode 'byte' */
 static u8 add_1reg(u8 byte, u32 dst_reg)
 {
                pop_r12(&prog);
        } else {
                pop_callee_regs(&prog, callee_regs_used);
+               if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
+                       pop_r12(&prog);
        }
 
        EMIT1(0x58);                              /* pop rax */
                pop_r12(&prog);
        } else {
                pop_callee_regs(&prog, callee_regs_used);
+               if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
+                       pop_r12(&prog);
        }
 
        EMIT1(0x58);                                  /* pop rax */
        *pprog = prog;
 }
 
+static void emit_insn_suffix_SIB(u8 **pprog, u32 ptr_reg, u32 val_reg, u32 index_reg, int off)
+{
+       u8 *prog = *pprog;
+
+       if (is_imm8(off)) {
+               EMIT3(add_2reg(0x44, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
+       } else {
+               EMIT2_off32(add_2reg(0x84, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
+       }
+       *pprog = prog;
+}
+
 /*
  * Emit a REX byte if it will be necessary to address these registers
  */
        *pprog = prog;
 }
 
+static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+       u8 *prog = *pprog;
+
+       switch (size) {
+       case BPF_B:
+               /* movzx rax, byte ptr [rax + r12 + off] */
+               EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB6);
+               break;
+       case BPF_H:
+               /* movzx rax, word ptr [rax + r12 + off] */
+               EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB7);
+               break;
+       case BPF_W:
+               /* mov eax, dword ptr [rax + r12 + off] */
+               EMIT2(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x8B);
+               break;
+       case BPF_DW:
+               /* mov rax, qword ptr [rax + r12 + off] */
+               EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x8B);
+               break;
+       }
+       emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off);
+       *pprog = prog;
+}
+
+static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+       emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
 /* STX: *(u8*)(dst_reg + off) = src_reg */
 static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
 {
        *pprog = prog;
 }
 
+/* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */
+static void emit_stx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+       u8 *prog = *pprog;
+
+       switch (size) {
+       case BPF_B:
+               /* mov byte ptr [rax + r12 + off], al */
+               EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x88);
+               break;
+       case BPF_H:
+               /* mov word ptr [rax + r12 + off], ax */
+               EMIT3(0x66, add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
+               break;
+       case BPF_W:
+               /* mov dword ptr [rax + r12 + 1], eax */
+               EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
+               break;
+       case BPF_DW:
+               /* mov qword ptr [rax + r12 + 1], rax */
+               EMIT2(add_3mod(0x48, dst_reg, src_reg, index_reg), 0x89);
+               break;
+       }
+       emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off);
+       *pprog = prog;
+}
+
+static void emit_stx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+       emit_stx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
+/* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */
+static void emit_st_index(u8 **pprog, u32 size, u32 dst_reg, u32 index_reg, int off, int imm)
+{
+       u8 *prog = *pprog;
+
+       switch (size) {
+       case BPF_B:
+               /* mov byte ptr [rax + r12 + off], imm8 */
+               EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC6);
+               break;
+       case BPF_H:
+               /* mov word ptr [rax + r12 + off], imm16 */
+               EMIT3(0x66, add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
+               break;
+       case BPF_W:
+               /* mov dword ptr [rax + r12 + 1], imm32 */
+               EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
+               break;
+       case BPF_DW:
+               /* mov qword ptr [rax + r12 + 1], imm32 */
+               EMIT2(add_3mod(0x48, dst_reg, 0, index_reg), 0xC7);
+               break;
+       }
+       emit_insn_suffix_SIB(&prog, dst_reg, 0, index_reg, off);
+       EMIT(imm, bpf_size_to_x86_bytes(size));
+       *pprog = prog;
+}
+
+static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
+{
+       emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
+}
+
 static int emit_atomic(u8 **pprog, u8 atomic_op,
                       u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
 {
        return 0;
 }
 
+#define DONT_CLEAR 1
+
 bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
 {
        u32 reg = x->fixup >> 8;
 
        /* jump over faulting load and clear dest register */
-       *(unsigned long *)((void *)regs + reg) = 0;
+       if (reg != DONT_CLEAR)
+               *(unsigned long *)((void *)regs + reg) = 0;
        regs->ip += x->fixup & 0xff;
        return true;
 }
        bool tail_call_seen = false;
        bool seen_exit = false;
        u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
+       u64 arena_vm_start;
        int i, excnt = 0;
        int ilen, proglen = 0;
        u8 *prog = temp;
        int err;
 
+       arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
+
        detect_reg_usage(insn, insn_cnt, callee_regs_used,
                         &tail_call_seen);
 
                push_r12(&prog);
                push_callee_regs(&prog, all_callee_regs_used);
        } else {
+               if (arena_vm_start)
+                       push_r12(&prog);
                push_callee_regs(&prog, callee_regs_used);
        }
+       if (arena_vm_start)
+               emit_mov_imm64(&prog, X86_REG_R12,
+                              arena_vm_start >> 32, (u32) arena_vm_start);
 
        ilen = prog - temp;
        if (rw_image)
                        emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
                        break;
 
+               case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
+               case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
+               case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
+               case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
+                       start_of_ldx = prog;
+                       emit_st_r12(&prog, BPF_SIZE(insn->code), dst_reg, insn->off, insn->imm);
+                       goto populate_extable;
+
+                       /* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */
+               case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
+               case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
+               case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
+               case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+               case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
+               case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
+               case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
+               case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
+                       start_of_ldx = prog;
+                       if (BPF_CLASS(insn->code) == BPF_LDX)
+                               emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+                       else
+                               emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+populate_extable:
+                       {
+                               struct exception_table_entry *ex;
+                               u8 *_insn = image + proglen + (start_of_ldx - temp);
+                               s64 delta;
+
+                               if (!bpf_prog->aux->extable)
+                                       break;
+
+                               if (excnt >= bpf_prog->aux->num_exentries) {
+                                       pr_err("mem32 extable bug\n");
+                                       return -EFAULT;
+                               }
+                               ex = &bpf_prog->aux->extable[excnt++];
+
+                               delta = _insn - (u8 *)&ex->insn;
+                               /* switch ex to rw buffer for writes */
+                               ex = (void *)rw_image + ((void *)ex - (void *)image);
+
+                               ex->insn = delta;
+
+                               ex->data = EX_TYPE_BPF;
+
+                               ex->fixup = (prog - start_of_ldx) |
+                                       ((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
+                       }
+                       break;
+
                        /* LDX: dst_reg = *(u8*)(src_reg + off) */
                case BPF_LDX | BPF_MEM | BPF_B:
                case BPF_LDX | BPF_PROBE_MEM | BPF_B:
                                pop_r12(&prog);
                        } else {
                                pop_callee_regs(&prog, callee_regs_used);
+                               if (arena_vm_start)
+                                       pop_r12(&prog);
                        }
                        EMIT1(0xC9);         /* leave */
                        emit_return(&prog, image + addrs[i - 1] + (prog - temp));