* then branch directly to slow_path_XXX if required.  (In fact, could
  * load a spare GPR with the address of slow_path_generic and pass size
  * as an argument, making the call site a mtlr, li and bllr.)
- *
- * Technically, the "is addr < 0" check is unnecessary & slowing down
- * the ABS path, as it's statically checked on generation.
  */
        .globl  sk_load_word
 sk_load_word:
        cmpdi   r_addr, 0
-       blt     bpf_error
+       blt     bpf_slow_path_word_neg
+       .globl  sk_load_word_positive_offset
+sk_load_word_positive_offset:
        /* Are we accessing past headlen? */
        subi    r_scratch1, r_HL, 4
        cmpd    r_scratch1, r_addr
        .globl  sk_load_half
 sk_load_half:
        cmpdi   r_addr, 0
-       blt     bpf_error
+       blt     bpf_slow_path_half_neg
+       .globl  sk_load_half_positive_offset
+sk_load_half_positive_offset:
        subi    r_scratch1, r_HL, 2
        cmpd    r_scratch1, r_addr
        blt     bpf_slow_path_half
        .globl  sk_load_byte
 sk_load_byte:
        cmpdi   r_addr, 0
-       blt     bpf_error
+       blt     bpf_slow_path_byte_neg
+       .globl  sk_load_byte_positive_offset
+sk_load_byte_positive_offset:
        cmpd    r_HL, r_addr
        ble     bpf_slow_path_byte
        lbzx    r_A, r_D, r_addr
 
 /*
  * BPF_S_LDX_B_MSH: ldxb  4*([offset]&0xf)
- * r_addr is the offset value, already known positive
+ * r_addr is the offset value
  */
        .globl sk_load_byte_msh
 sk_load_byte_msh:
+       cmpdi   r_addr, 0
+       blt     bpf_slow_path_byte_msh_neg
+       .globl sk_load_byte_msh_positive_offset
+sk_load_byte_msh_positive_offset:
        cmpd    r_HL, r_addr
        ble     bpf_slow_path_byte_msh
        lbzx    r_X, r_D, r_addr
        rlwinm  r_X, r_X, 2, 32-4-2, 31-2
        blr
 
-bpf_error:
-       /* Entered with cr0 = lt */
-       li      r3, 0
-       /* Generated code will 'blt epilogue', returning 0. */
-       blr
-
 /* Call out to skb_copy_bits:
  * We'll need to back up our volatile regs first; we have
  * local variable space at r1+(BPF_PPC_STACK_BASIC).
        lbz     r_X, BPF_PPC_STACK_BASIC+(2*8)(r1)
        rlwinm  r_X, r_X, 2, 32-4-2, 31-2
        blr
+
+/* Call out to bpf_internal_load_pointer_neg_helper:
+ * We'll need to back up our volatile regs first; we have
+ * local variable space at r1+(BPF_PPC_STACK_BASIC).
+ * Allocate a new stack frame here to remain ABI-compliant in
+ * stashing LR.
+ */
+#define sk_negative_common(SIZE)                               \
+       mflr    r0;                                             \
+       std     r0, 16(r1);                                     \
+       /* R3 goes in parameter space of caller's frame */      \
+       std     r_skb, (BPF_PPC_STACKFRAME+48)(r1);             \
+       std     r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);           \
+       std     r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);           \
+       stdu    r1, -BPF_PPC_SLOWPATH_FRAME(r1);                \
+       /* R3 = r_skb, as passed */                             \
+       mr      r4, r_addr;                                     \
+       li      r5, SIZE;                                       \
+       bl      bpf_internal_load_pointer_neg_helper;           \
+       /* R3 != 0 on success */                                \
+       addi    r1, r1, BPF_PPC_SLOWPATH_FRAME;                 \
+       ld      r0, 16(r1);                                     \
+       ld      r_A, (BPF_PPC_STACK_BASIC+(0*8))(r1);           \
+       ld      r_X, (BPF_PPC_STACK_BASIC+(1*8))(r1);           \
+       mtlr    r0;                                             \
+       cmpldi  r3, 0;                                          \
+       beq     bpf_error_slow; /* cr0 = EQ */                  \
+       mr      r_addr, r3;                                     \
+       ld      r_skb, (BPF_PPC_STACKFRAME+48)(r1);             \
+       /* Great success! */
+
+bpf_slow_path_word_neg:
+       lis     r_scratch1,-32  /* SKF_LL_OFF */
+       cmpd    r_addr, r_scratch1      /* addr < SKF_* */
+       blt     bpf_error       /* cr0 = LT */
+       .globl  sk_load_word_negative_offset
+sk_load_word_negative_offset:
+       sk_negative_common(4)
+       lwz     r_A, 0(r_addr)
+       blr
+
+bpf_slow_path_half_neg:
+       lis     r_scratch1,-32  /* SKF_LL_OFF */
+       cmpd    r_addr, r_scratch1      /* addr < SKF_* */
+       blt     bpf_error       /* cr0 = LT */
+       .globl  sk_load_half_negative_offset
+sk_load_half_negative_offset:
+       sk_negative_common(2)
+       lhz     r_A, 0(r_addr)
+       blr
+
+bpf_slow_path_byte_neg:
+       lis     r_scratch1,-32  /* SKF_LL_OFF */
+       cmpd    r_addr, r_scratch1      /* addr < SKF_* */
+       blt     bpf_error       /* cr0 = LT */
+       .globl  sk_load_byte_negative_offset
+sk_load_byte_negative_offset:
+       sk_negative_common(1)
+       lbz     r_A, 0(r_addr)
+       blr
+
+bpf_slow_path_byte_msh_neg:
+       lis     r_scratch1,-32  /* SKF_LL_OFF */
+       cmpd    r_addr, r_scratch1      /* addr < SKF_* */
+       blt     bpf_error       /* cr0 = LT */
+       .globl  sk_load_byte_msh_negative_offset
+sk_load_byte_msh_negative_offset:
+       sk_negative_common(1)
+       lbz     r_X, 0(r_addr)
+       rlwinm  r_X, r_X, 2, 32-4-2, 31-2
+       blr
+
+bpf_error_slow:
+       /* fabricate a cr0 = lt */
+       li      r_scratch1, -1
+       cmpdi   r_scratch1, 0
+bpf_error:
+       /* Entered with cr0 = lt */
+       li      r3, 0
+       /* Generated code will 'blt epilogue', returning 0. */
+       blr
 
        PPC_BLR();
 }
 
+#define CHOOSE_LOAD_FUNC(K, func) \
+       ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
+
 /* Assemble the body code between the prologue & epilogue. */
 static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
                              struct codegen_context *ctx,
 
                        /*** Absolute loads from packet header/data ***/
                case BPF_S_LD_W_ABS:
-                       func = sk_load_word;
+                       func = CHOOSE_LOAD_FUNC(K, sk_load_word);
                        goto common_load;
                case BPF_S_LD_H_ABS:
-                       func = sk_load_half;
+                       func = CHOOSE_LOAD_FUNC(K, sk_load_half);
                        goto common_load;
                case BPF_S_LD_B_ABS:
-                       func = sk_load_byte;
+                       func = CHOOSE_LOAD_FUNC(K, sk_load_byte);
                common_load:
-                       /*
-                        * Load from [K].  Reference with the (negative)
-                        * SKF_NET_OFF/SKF_LL_OFF offsets is unsupported.
-                        */
+                       /* Load from [K]. */
                        ctx->seen |= SEEN_DATAREF;
-                       if ((int)K < 0)
-                               return -ENOTSUPP;
                        PPC_LI64(r_scratch1, func);
                        PPC_MTLR(r_scratch1);
                        PPC_LI32(r_addr, K);
                common_load_ind:
                        /*
                         * Load from [X + K].  Negative offsets are tested for
-                        * in the helper functions, and result in a 'ret 0'.
+                        * in the helper functions.
                         */
                        ctx->seen |= SEEN_DATAREF | SEEN_XREG;
                        PPC_LI64(r_scratch1, func);
                        break;
 
                case BPF_S_LDX_B_MSH:
-                       /*
-                        * x86 version drops packet (RET 0) when K<0, whereas
-                        * interpreter does allow K<0 (__load_pointer, special
-                        * ancillary data).  common_load returns ENOTSUPP if K<0,
-                        * so we fall back to interpreter & filter works.
-                        */
-                       func = sk_load_byte_msh;
+                       func = CHOOSE_LOAD_FUNC(K, sk_load_byte_msh);
                        goto common_load;
                        break;