__mark_reg_not_init(regs + regno);
 }
 
+#define DEF_NOT_SUBREG (0)
 static void init_reg_state(struct bpf_verifier_env *env,
                           struct bpf_func_state *state)
 {
                mark_reg_not_init(env, regs, i);
                regs[i].live = REG_LIVE_NONE;
                regs[i].parent = NULL;
+               regs[i].subreg_def = DEF_NOT_SUBREG;
        }
 
        /* frame pointer */
  */
 static int mark_reg_read(struct bpf_verifier_env *env,
                         const struct bpf_reg_state *state,
-                        struct bpf_reg_state *parent)
+                        struct bpf_reg_state *parent, u8 flag)
 {
        bool writes = parent == state->parent; /* Observe write marks */
        int cnt = 0;
                                parent->var_off.value, parent->off);
                        return -EFAULT;
                }
-               if (parent->live & REG_LIVE_READ)
+               /* The first condition is more likely to be true than the
+                * second, checked it first.
+                */
+               if ((parent->live & REG_LIVE_READ) == flag ||
+                   parent->live & REG_LIVE_READ64)
                        /* The parentage chain never changes and
                         * this parent was already marked as LIVE_READ.
                         * There is no need to keep walking the chain again and
                         * keep re-marking all parents as LIVE_READ.
                         * This case happens when the same register is read
                         * multiple times without writes into it in-between.
+                        * Also, if parent has the stronger REG_LIVE_READ64 set,
+                        * then no need to set the weak REG_LIVE_READ32.
                         */
                        break;
                /* ... then we depend on parent's value */
-               parent->live |= REG_LIVE_READ;
+               parent->live |= flag;
+               /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
+               if (flag == REG_LIVE_READ64)
+                       parent->live &= ~REG_LIVE_READ32;
                state = parent;
                parent = state->parent;
                writes = true;
        return 0;
 }
 
+/* This function is supposed to be used by the following 32-bit optimization
+ * code only. It returns TRUE if the source or destination register operates
+ * on 64-bit, otherwise return FALSE.
+ */
+static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
+                    u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
+{
+       u8 code, class, op;
+
+       code = insn->code;
+       class = BPF_CLASS(code);
+       op = BPF_OP(code);
+       if (class == BPF_JMP) {
+               /* BPF_EXIT for "main" will reach here. Return TRUE
+                * conservatively.
+                */
+               if (op == BPF_EXIT)
+                       return true;
+               if (op == BPF_CALL) {
+                       /* BPF to BPF call will reach here because of marking
+                        * caller saved clobber with DST_OP_NO_MARK for which we
+                        * don't care the register def because they are anyway
+                        * marked as NOT_INIT already.
+                        */
+                       if (insn->src_reg == BPF_PSEUDO_CALL)
+                               return false;
+                       /* Helper call will reach here because of arg type
+                        * check, conservatively return TRUE.
+                        */
+                       if (t == SRC_OP)
+                               return true;
+
+                       return false;
+               }
+       }
+
+       if (class == BPF_ALU64 || class == BPF_JMP ||
+           /* BPF_END always use BPF_ALU class. */
+           (class == BPF_ALU && op == BPF_END && insn->imm == 64))
+               return true;
+
+       if (class == BPF_ALU || class == BPF_JMP32)
+               return false;
+
+       if (class == BPF_LDX) {
+               if (t != SRC_OP)
+                       return BPF_SIZE(code) == BPF_DW;
+               /* LDX source must be ptr. */
+               return true;
+       }
+
+       if (class == BPF_STX) {
+               if (reg->type != SCALAR_VALUE)
+                       return true;
+               return BPF_SIZE(code) == BPF_DW;
+       }
+
+       if (class == BPF_LD) {
+               u8 mode = BPF_MODE(code);
+
+               /* LD_IMM64 */
+               if (mode == BPF_IMM)
+                       return true;
+
+               /* Both LD_IND and LD_ABS return 32-bit data. */
+               if (t != SRC_OP)
+                       return  false;
+
+               /* Implicit ctx ptr. */
+               if (regno == BPF_REG_6)
+                       return true;
+
+               /* Explicit source could be any width. */
+               return true;
+       }
+
+       if (class == BPF_ST)
+               /* The only source register for BPF_ST is a ptr. */
+               return true;
+
+       /* Conservatively return true at default. */
+       return true;
+}
+
+static void mark_insn_zext(struct bpf_verifier_env *env,
+                          struct bpf_reg_state *reg)
+{
+       s32 def_idx = reg->subreg_def;
+
+       if (def_idx == DEF_NOT_SUBREG)
+               return;
+
+       env->insn_aux_data[def_idx - 1].zext_dst = true;
+       /* The dst will be zero extended, so won't be sub-register anymore. */
+       reg->subreg_def = DEF_NOT_SUBREG;
+}
+
 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
                         enum reg_arg_type t)
 {
        struct bpf_verifier_state *vstate = env->cur_state;
        struct bpf_func_state *state = vstate->frame[vstate->curframe];
+       struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
        struct bpf_reg_state *reg, *regs = state->regs;
+       bool rw64;
 
        if (regno >= MAX_BPF_REG) {
                verbose(env, "R%d is invalid\n", regno);
        }
 
        reg = ®s[regno];
+       rw64 = is_reg64(env, insn, regno, reg, t);
        if (t == SRC_OP) {
                /* check whether register used as source operand can be read */
                if (reg->type == NOT_INIT) {
                if (regno == BPF_REG_FP)
                        return 0;
 
-               return mark_reg_read(env, reg, reg->parent);
+               if (rw64)
+                       mark_insn_zext(env, reg);
+
+               return mark_reg_read(env, reg, reg->parent,
+                                    rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
        } else {
                /* check whether register used as dest operand can be written to */
                if (regno == BPF_REG_FP) {
                        return -EACCES;
                }
                reg->live |= REG_LIVE_WRITTEN;
+               reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
                if (t == DST_OP)
                        mark_reg_unknown(env, regs, regno);
        }
                        state->regs[value_regno].live |= REG_LIVE_WRITTEN;
                }
                mark_reg_read(env, ®_state->stack[spi].spilled_ptr,
-                             reg_state->stack[spi].spilled_ptr.parent);
+                             reg_state->stack[spi].spilled_ptr.parent,
+                             REG_LIVE_READ64);
                return 0;
        } else {
                int zeros = 0;
                        return -EACCES;
                }
                mark_reg_read(env, ®_state->stack[spi].spilled_ptr,
-                             reg_state->stack[spi].spilled_ptr.parent);
+                             reg_state->stack[spi].spilled_ptr.parent,
+                             REG_LIVE_READ64);
                if (value_regno >= 0) {
                        if (zeros == size) {
                                /* any size read into register is zero extended,
                                                    value_regno);
                                if (reg_type_may_be_null(reg_type))
                                        regs[value_regno].id = ++env->id_gen;
+                               /* A load of ctx field could have different
+                                * actual load size with the one encoded in the
+                                * insn. When the dst is PTR, it is for sure not
+                                * a sub-register.
+                                */
+                               regs[value_regno].subreg_def = DEF_NOT_SUBREG;
                        }
                        regs[value_regno].type = reg_type;
                }
                 * the whole slot to be marked as 'read'
                 */
                mark_reg_read(env, &state->stack[spi].spilled_ptr,
-                             state->stack[spi].spilled_ptr.parent);
+                             state->stack[spi].spilled_ptr.parent,
+                             REG_LIVE_READ64);
        }
        return update_stack_depth(env, state, min_off);
 }
                check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
        }
 
+       /* helper call returns 64-bit value. */
+       regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
+
        /* update return register (already marked as written above) */
        if (fn->ret_type == RET_INTEGER) {
                /* sets type to SCALAR_VALUE */
                                 */
                                *dst_reg = *src_reg;
                                dst_reg->live |= REG_LIVE_WRITTEN;
+                               dst_reg->subreg_def = DEF_NOT_SUBREG;
                        } else {
                                /* R1 = (u32) R2 */
                                if (is_pointer_value(env, insn->src_reg)) {
                                } else if (src_reg->type == SCALAR_VALUE) {
                                        *dst_reg = *src_reg;
                                        dst_reg->live |= REG_LIVE_WRITTEN;
+                                       dst_reg->subreg_def = env->insn_idx + 1;
                                } else {
                                        mark_reg_unknown(env, regs,
                                                         insn->dst_reg);
         * Already marked as written above.
         */
        mark_reg_unknown(env, regs, BPF_REG_0);
+       /* ld_abs load up to 32-bit skb data. */
+       regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
        return 0;
 }
 
        return true;
 }
 
+/* Return 0 if no propagation happened. Return negative error code if error
+ * happened. Otherwise, return the propagated bit.
+ */
 static int propagate_liveness_reg(struct bpf_verifier_env *env,
                                  struct bpf_reg_state *reg,
                                  struct bpf_reg_state *parent_reg)
 {
+       u8 parent_flag = parent_reg->live & REG_LIVE_READ;
+       u8 flag = reg->live & REG_LIVE_READ;
        int err;
 
-       if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
+       /* When comes here, read flags of PARENT_REG or REG could be any of
+        * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
+        * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
+        */
+       if (parent_flag == REG_LIVE_READ64 ||
+           /* Or if there is no read flag from REG. */
+           !flag ||
+           /* Or if the read flag from REG is the same as PARENT_REG. */
+           parent_flag == flag)
                return 0;
 
-       err = mark_reg_read(env, reg, parent_reg);
+       err = mark_reg_read(env, reg, parent_reg, flag);
        if (err)
                return err;
 
-       return 0;
+       return flag;
 }
 
 /* A write screens off any subsequent reads; but write marks come from the
                for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
                        err = propagate_liveness_reg(env, &state_reg[i],
                                                     &parent_reg[i]);
-                       if (err)
+                       if (err < 0)
                                return err;
+                       if (err == REG_LIVE_READ64)
+                               mark_insn_zext(env, &parent_reg[i]);
                }
 
                /* Propagate stack slots. */
                        state_reg = &state->stack[i].spilled_ptr;
                        err = propagate_liveness_reg(env, state_reg,
                                                     parent_reg);
-                       if (err)
+                       if (err < 0)
                                return err;
                }
        }
-       return err;
+       return 0;
 }
 
 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)