*pprog = prog;
 }
 
+
+static void emit_load_skb_data_hlen(u8 **pprog)
+{
+       u8 *prog = *pprog;
+       int cnt = 0;
+
+       /* r9d = skb->len - skb->data_len (headlen)
+        * r10 = skb->data
+        */
+       /* mov %r9d, off32(%rdi) */
+       EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
+
+       /* sub %r9d, off32(%rdi) */
+       EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
+
+       /* mov %r10, off32(%rdi) */
+       EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
+       *pprog = prog;
+}
+
 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
                  int oldproglen, struct jit_context *ctx)
 {
 
        emit_prologue(&prog);
 
-       if (seen_ld_abs) {
-               /* r9d : skb->len - skb->data_len (headlen)
-                * r10 : skb->data
-                */
-               if (is_imm8(offsetof(struct sk_buff, len)))
-                       /* mov %r9d, off8(%rdi) */
-                       EMIT4(0x44, 0x8b, 0x4f,
-                             offsetof(struct sk_buff, len));
-               else
-                       /* mov %r9d, off32(%rdi) */
-                       EMIT3_off32(0x44, 0x8b, 0x8f,
-                                   offsetof(struct sk_buff, len));
-
-               if (is_imm8(offsetof(struct sk_buff, data_len)))
-                       /* sub %r9d, off8(%rdi) */
-                       EMIT4(0x44, 0x2b, 0x4f,
-                             offsetof(struct sk_buff, data_len));
-               else
-                       EMIT3_off32(0x44, 0x2b, 0x8f,
-                                   offsetof(struct sk_buff, data_len));
-
-               if (is_imm8(offsetof(struct sk_buff, data)))
-                       /* mov %r10, off8(%rdi) */
-                       EMIT4(0x4c, 0x8b, 0x57,
-                             offsetof(struct sk_buff, data));
-               else
-                       /* mov %r10, off32(%rdi) */
-                       EMIT3_off32(0x4c, 0x8b, 0x97,
-                                   offsetof(struct sk_buff, data));
-       }
+       if (seen_ld_abs)
+               emit_load_skb_data_hlen(&prog);
 
        for (i = 0; i < insn_cnt; i++, insn++) {
                const s32 imm32 = insn->imm;
                u8 b1 = 0, b2 = 0, b3 = 0;
                s64 jmp_offset;
                u8 jmp_cond;
+               bool reload_skb_data;
                int ilen;
                u8 *func;
 
                        func = (u8 *) __bpf_call_base + imm32;
                        jmp_offset = func - (image + addrs[i]);
                        if (seen_ld_abs) {
-                               EMIT2(0x41, 0x52); /* push %r10 */
-                               EMIT2(0x41, 0x51); /* push %r9 */
-                               /* need to adjust jmp offset, since
-                                * pop %r9, pop %r10 take 4 bytes after call insn
-                                */
-                               jmp_offset += 4;
+                               reload_skb_data = bpf_helper_changes_skb_data(func);
+                               if (reload_skb_data) {
+                                       EMIT1(0x57); /* push %rdi */
+                                       jmp_offset += 22; /* pop, mov, sub, mov */
+                               } else {
+                                       EMIT2(0x41, 0x52); /* push %r10 */
+                                       EMIT2(0x41, 0x51); /* push %r9 */
+                                       /* need to adjust jmp offset, since
+                                        * pop %r9, pop %r10 take 4 bytes after call insn
+                                        */
+                                       jmp_offset += 4;
+                               }
                        }
                        if (!imm32 || !is_simm32(jmp_offset)) {
                                pr_err("unsupported bpf func %d addr %p image %p\n",
                        }
                        EMIT1_off32(0xE8, jmp_offset);
                        if (seen_ld_abs) {
-                               EMIT2(0x41, 0x59); /* pop %r9 */
-                               EMIT2(0x41, 0x5A); /* pop %r10 */
+                               if (reload_skb_data) {
+                                       EMIT1(0x5F); /* pop %rdi */
+                                       emit_load_skb_data_hlen(&prog);
+                               } else {
+                                       EMIT2(0x41, 0x59); /* pop %r9 */
+                                       EMIT2(0x41, 0x5A); /* pop %r10 */
+                               }
                        }
                        break;
 
 
        .arg1_type      = ARG_PTR_TO_CTX,
 };
 
+static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
+{
+       struct sk_buff *skb = (struct sk_buff *) (long) r1;
+       __be16 vlan_proto = (__force __be16) r2;
+
+       if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
+                    vlan_proto != htons(ETH_P_8021AD)))
+               vlan_proto = htons(ETH_P_8021Q);
+
+       return skb_vlan_push(skb, vlan_proto, vlan_tci);
+}
+
+const struct bpf_func_proto bpf_skb_vlan_push_proto = {
+       .func           = bpf_skb_vlan_push,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+       .arg2_type      = ARG_ANYTHING,
+       .arg3_type      = ARG_ANYTHING,
+};
+
+static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+       struct sk_buff *skb = (struct sk_buff *) (long) r1;
+
+       return skb_vlan_pop(skb);
+}
+
+const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
+       .func           = bpf_skb_vlan_pop,
+       .gpl_only       = false,
+       .ret_type       = RET_INTEGER,
+       .arg1_type      = ARG_PTR_TO_CTX,
+};
+
+bool bpf_helper_changes_skb_data(void *func)
+{
+       if (func == bpf_skb_vlan_push)
+               return true;
+       if (func == bpf_skb_vlan_pop)
+               return true;
+       return false;
+}
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
                return &bpf_clone_redirect_proto;
        case BPF_FUNC_get_cgroup_classid:
                return &bpf_get_cgroup_classid_proto;
+       case BPF_FUNC_skb_vlan_push:
+               return &bpf_skb_vlan_push_proto;
+       case BPF_FUNC_skb_vlan_pop:
+               return &bpf_skb_vlan_pop_proto;
        default:
                return sk_filter_func_proto(func_id);
        }