dst_state->parent = src->parent;
        dst_state->first_insn_idx = src->first_insn_idx;
        dst_state->last_insn_idx = src->last_insn_idx;
+       dst_state->dfs_depth = src->dfs_depth;
        for (i = 0; i <= src->curframe; i++) {
                dst = dst_state->frame[i];
                if (!dst) {
        return 0;
 }
 
+/* Look for a previous loop entry at insn_idx: nearest parent state
+ * stopped at insn_idx with callsites matching those in cur->frame.
+ */
+static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
+                                                 struct bpf_verifier_state *cur,
+                                                 int insn_idx)
+{
+       struct bpf_verifier_state_list *sl;
+       struct bpf_verifier_state *st;
+
+       /* Explored states are pushed in stack order, most recent states come first */
+       sl = *explored_state(env, insn_idx);
+       for (; sl; sl = sl->next) {
+               /* If st->branches != 0 state is a part of current DFS verification path,
+                * hence cur & st for a loop.
+                */
+               st = &sl->state;
+               if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
+                   st->dfs_depth < cur->dfs_depth)
+                       return st;
+       }
+
+       return NULL;
+}
+
+static void reset_idmap_scratch(struct bpf_verifier_env *env);
+static bool regs_exact(const struct bpf_reg_state *rold,
+                      const struct bpf_reg_state *rcur,
+                      struct bpf_idmap *idmap);
+
+static void maybe_widen_reg(struct bpf_verifier_env *env,
+                           struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
+                           struct bpf_idmap *idmap)
+{
+       if (rold->type != SCALAR_VALUE)
+               return;
+       if (rold->type != rcur->type)
+               return;
+       if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap))
+               return;
+       __mark_reg_unknown(env, rcur);
+}
+
+static int widen_imprecise_scalars(struct bpf_verifier_env *env,
+                                  struct bpf_verifier_state *old,
+                                  struct bpf_verifier_state *cur)
+{
+       struct bpf_func_state *fold, *fcur;
+       int i, fr;
+
+       reset_idmap_scratch(env);
+       for (fr = old->curframe; fr >= 0; fr--) {
+               fold = old->frame[fr];
+               fcur = cur->frame[fr];
+
+               for (i = 0; i < MAX_BPF_REG; i++)
+                       maybe_widen_reg(env,
+                                       &fold->regs[i],
+                                       &fcur->regs[i],
+                                       &env->idmap_scratch);
+
+               for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
+                       if (!is_spilled_reg(&fold->stack[i]) ||
+                           !is_spilled_reg(&fcur->stack[i]))
+                               continue;
+
+                       maybe_widen_reg(env,
+                                       &fold->stack[i].spilled_ptr,
+                                       &fcur->stack[i].spilled_ptr,
+                                       &env->idmap_scratch);
+               }
+       }
+       return 0;
+}
+
 /* process_iter_next_call() is called when verifier gets to iterator's next
  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
  * to it as just "iter_next()" in comments below.
  * is some statically known limit on number of iterations (e.g., if there is
  * an explicit `if n > 100 then break;` statement somewhere in the loop).
  *
- * One very subtle but very important aspect is that we *always* simulate NULL
- * condition first (as the current state) before we simulate non-NULL case.
- * This has to do with intricacies of scalar precision tracking. By simulating
- * "exit condition" of iter_next() returning NULL first, we make sure all the
- * relevant precision marks *that will be set **after** we exit iterator loop*
- * are propagated backwards to common parent state of NULL and non-NULL
- * branches. Thanks to that, state equivalence checks done later in forked
- * state, when reaching iter_next() for ACTIVE iterator, can assume that
- * precision marks are finalized and won't change. Because simulating another
- * ACTIVE iterator iteration won't change them (because given same input
- * states we'll end up with exactly same output states which we are currently
- * comparing; and verification after the loop already propagated back what
- * needs to be **additionally** tracked as precise). It's subtle, grok
- * precision tracking for more intuitive understanding.
+ * Iteration convergence logic in is_state_visited() relies on exact
+ * states comparison, which ignores read and precision marks.
+ * This is necessary because read and precision marks are not finalized
+ * while in the loop. Exact comparison might preclude convergence for
+ * simple programs like below:
+ *
+ *     i = 0;
+ *     while(iter_next(&it))
+ *       i++;
+ *
+ * At each iteration step i++ would produce a new distinct state and
+ * eventually instruction processing limit would be reached.
+ *
+ * To avoid such behavior speculatively forget (widen) range for
+ * imprecise scalar registers, if those registers were not precise at the
+ * end of the previous iteration and do not match exactly.
+ *
+ * This is a conservative heuristic that allows to verify wide range of programs,
+ * however it precludes verification of programs that conjure an
+ * imprecise value on the first loop iteration and use it as precise on a second.
+ * For example, the following safe program would fail to verify:
+ *
+ *     struct bpf_num_iter it;
+ *     int arr[10];
+ *     int i = 0, a = 0;
+ *     bpf_iter_num_new(&it, 0, 10);
+ *     while (bpf_iter_num_next(&it)) {
+ *       if (a == 0) {
+ *         a = 1;
+ *         i = 7; // Because i changed verifier would forget
+ *                // it's range on second loop entry.
+ *       } else {
+ *         arr[i] = 42; // This would fail to verify.
+ *       }
+ *     }
+ *     bpf_iter_num_destroy(&it);
  */
 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
                                  struct bpf_kfunc_call_arg_meta *meta)
 {
-       struct bpf_verifier_state *cur_st = env->cur_state, *queued_st;
+       struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
        struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
        struct bpf_reg_state *cur_iter, *queued_iter;
        int iter_frameno = meta->iter.frameno;
        }
 
        if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
+               /* Because iter_next() call is a checkpoint is_state_visitied()
+                * should guarantee parent state with same call sites and insn_idx.
+                */
+               if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
+                   !same_callsites(cur_st->parent, cur_st)) {
+                       verbose(env, "bug: bad parent state for iter next call");
+                       return -EFAULT;
+               }
+               /* Note cur_st->parent in the call below, it is necessary to skip
+                * checkpoint created for cur_st by is_state_visited()
+                * right at this instruction.
+                */
+               prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
                /* branch out active iter state */
                queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
                if (!queued_st)
                queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
                queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
                queued_iter->iter.depth++;
+               if (prev_st)
+                       widen_imprecise_scalars(env, prev_st, queued_st);
 
                queued_fr = queued_st->frame[queued_st->curframe];
                mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
 
 /* Returns true if (rold safe implies rcur safe) */
 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
-                   struct bpf_reg_state *rcur, struct bpf_idmap *idmap)
+                   struct bpf_reg_state *rcur, struct bpf_idmap *idmap, bool exact)
 {
+       if (exact)
+               return regs_exact(rold, rcur, idmap);
+
        if (!(rold->live & REG_LIVE_READ))
                /* explored state didn't use this */
                return true;
 }
 
 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
-                     struct bpf_func_state *cur, struct bpf_idmap *idmap)
+                     struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
 {
        int i, spi;
 
 
                spi = i / BPF_REG_SIZE;
 
-               if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
+               if (exact &&
+                   old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
+                   cur->stack[spi].slot_type[i % BPF_REG_SIZE])
+                       return false;
+
+               if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) {
                        i += BPF_REG_SIZE - 1;
                        /* explored state didn't use this */
                        continue;
                         * return false to continue verification of this path
                         */
                        if (!regsafe(env, &old->stack[spi].spilled_ptr,
-                                    &cur->stack[spi].spilled_ptr, idmap))
+                                    &cur->stack[spi].spilled_ptr, idmap, exact))
                                return false;
                        break;
                case STACK_DYNPTR:
  * the current state will reach 'bpf_exit' instruction safely
  */
 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
-                             struct bpf_func_state *cur)
+                             struct bpf_func_state *cur, bool exact)
 {
        int i;
 
        for (i = 0; i < MAX_BPF_REG; i++)
                if (!regsafe(env, &old->regs[i], &cur->regs[i],
-                            &env->idmap_scratch))
+                            &env->idmap_scratch, exact))
                        return false;
 
-       if (!stacksafe(env, old, cur, &env->idmap_scratch))
+       if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
                return false;
 
        if (!refsafe(old, cur, &env->idmap_scratch))
        return true;
 }
 
+static void reset_idmap_scratch(struct bpf_verifier_env *env)
+{
+       env->idmap_scratch.tmp_id_gen = env->id_gen;
+       memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
+}
+
 static bool states_equal(struct bpf_verifier_env *env,
                         struct bpf_verifier_state *old,
-                        struct bpf_verifier_state *cur)
+                        struct bpf_verifier_state *cur,
+                        bool exact)
 {
        int i;
 
        if (old->curframe != cur->curframe)
                return false;
 
-       env->idmap_scratch.tmp_id_gen = env->id_gen;
-       memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
+       reset_idmap_scratch(env);
 
        /* Verification state from speculative execution simulation
         * must never prune a non-speculative execution one.
        for (i = 0; i <= old->curframe; i++) {
                if (old->frame[i]->callsite != cur->frame[i]->callsite)
                        return false;
-               if (!func_states_equal(env, old->frame[i], cur->frame[i]))
+               if (!func_states_equal(env, old->frame[i], cur->frame[i], exact))
                        return false;
        }
        return true;
        struct bpf_verifier_state_list *new_sl;
        struct bpf_verifier_state_list *sl, **pprev;
        struct bpf_verifier_state *cur = env->cur_state, *new;
-       int i, j, err, states_cnt = 0;
+       int i, j, n, err, states_cnt = 0;
        bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
        bool add_new_state = force_new_state;
 
                         * It's safe to assume that iterator loop will finish, taking into
                         * account iter_next() contract of eventually returning
                         * sticky NULL result.
+                        *
+                        * Note, that states have to be compared exactly in this case because
+                        * read and precision marks might not be finalized inside the loop.
+                        * E.g. as in the program below:
+                        *
+                        *     1. r7 = -16
+                        *     2. r6 = bpf_get_prandom_u32()
+                        *     3. while (bpf_iter_num_next(&fp[-8])) {
+                        *     4.   if (r6 != 42) {
+                        *     5.     r7 = -32
+                        *     6.     r6 = bpf_get_prandom_u32()
+                        *     7.     continue
+                        *     8.   }
+                        *     9.   r0 = r10
+                        *    10.   r0 += r7
+                        *    11.   r8 = *(u64 *)(r0 + 0)
+                        *    12.   r6 = bpf_get_prandom_u32()
+                        *    13. }
+                        *
+                        * Here verifier would first visit path 1-3, create a checkpoint at 3
+                        * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
+                        * not have read or precision mark for r7 yet, thus inexact states
+                        * comparison would discard current state with r7=-32
+                        * => unsafe memory access at 11 would not be caught.
                         */
                        if (is_iter_next_insn(env, insn_idx)) {
-                               if (states_equal(env, &sl->state, cur)) {
+                               if (states_equal(env, &sl->state, cur, true)) {
                                        struct bpf_func_state *cur_frame;
                                        struct bpf_reg_state *iter_state, *iter_reg;
                                        int spi;
                        }
                        /* attempt to detect infinite loop to avoid unnecessary doomed work */
                        if (states_maybe_looping(&sl->state, cur) &&
-                           states_equal(env, &sl->state, cur) &&
+                           states_equal(env, &sl->state, cur, false) &&
                            !iter_active_depths_differ(&sl->state, cur)) {
                                verbose_linfo(env, insn_idx, "; ");
                                verbose(env, "infinite loop detected at insn %d\n", insn_idx);
                                add_new_state = false;
                        goto miss;
                }
-               if (states_equal(env, &sl->state, cur)) {
+               if (states_equal(env, &sl->state, cur, false)) {
 hit:
                        sl->hit_cnt++;
                        /* reached equivalent register/stack state,
                 * to keep checking from state equivalence point of view.
                 * Higher numbers increase max_states_per_insn and verification time,
                 * but do not meaningfully decrease insn_processed.
+                * 'n' controls how many times state could miss before eviction.
+                * Use bigger 'n' for checkpoints because evicting checkpoint states
+                * too early would hinder iterator convergence.
                 */
-               if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
+               n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
+               if (sl->miss_cnt > sl->hit_cnt * n + n) {
                        /* the state is unlikely to be useful. Remove it to
                         * speed up verification
                         */
 
        cur->parent = new;
        cur->first_insn_idx = insn_idx;
+       cur->dfs_depth = new->dfs_depth + 1;
        clear_jmp_history(cur);
        new_sl->next = *explored_state(env, insn_idx);
        *explored_state(env, insn_idx) = new_sl;