#define ANON_AND_FILE 2
 
 enum lruvec_flags {
-       LRUVEC_CONGESTED,               /* lruvec has many dirty pages
-                                        * backed by a congested BDI
-                                        */
+       /*
+        * An lruvec has many dirty pages backed by a congested BDI:
+        * 1. LRUVEC_CGROUP_CONGESTED is set by cgroup-level reclaim.
+        *    It can be cleared by cgroup reclaim or kswapd.
+        * 2. LRUVEC_NODE_CONGESTED is set by kswapd node-level reclaim.
+        *    It can only be cleared by kswapd.
+        *
+        * Essentially, kswapd can unthrottle an lruvec throttled by cgroup
+        * reclaim, but not vice versa. This only applies to the root cgroup.
+        * The goal is to prevent cgroup reclaim on the root cgroup (e.g.
+        * memory.reclaim) to unthrottle an unbalanced node (that was throttled
+        * by kswapd).
+        */
+       LRUVEC_CGROUP_CONGESTED,
+       LRUVEC_NODE_CONGESTED,
 };
 
 #endif /* !__GENERATING_BOUNDS_H */
 
         * Legacy memcg will stall in page writeback so avoid forcibly
         * stalling in reclaim_throttle().
         */
-       if ((current_is_kswapd() ||
-            (cgroup_reclaim(sc) && writeback_throttling_sane(sc))) &&
-           sc->nr.dirty && sc->nr.dirty == sc->nr.congested)
-               set_bit(LRUVEC_CONGESTED, &target_lruvec->flags);
+       if (sc->nr.dirty && sc->nr.dirty == sc->nr.congested) {
+               if (cgroup_reclaim(sc) && writeback_throttling_sane(sc))
+                       set_bit(LRUVEC_CGROUP_CONGESTED, &target_lruvec->flags);
+
+               if (current_is_kswapd())
+                       set_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags);
+       }
 
        /*
         * Stall direct reclaim for IO completions if the lruvec is
         */
        if (!current_is_kswapd() && current_may_throttle() &&
            !sc->hibernation_mode &&
-           test_bit(LRUVEC_CONGESTED, &target_lruvec->flags))
+           (test_bit(LRUVEC_CGROUP_CONGESTED, &target_lruvec->flags) ||
+            test_bit(LRUVEC_NODE_CONGESTED, &target_lruvec->flags)))
                reclaim_throttle(pgdat, VMSCAN_THROTTLE_CONGESTED);
 
        if (should_continue_reclaim(pgdat, nr_node_reclaimed, sc))
 
                        lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup,
                                                   zone->zone_pgdat);
-                       clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
+                       clear_bit(LRUVEC_CGROUP_CONGESTED, &lruvec->flags);
                }
        }
 
 {
        struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat);
 
-       clear_bit(LRUVEC_CONGESTED, &lruvec->flags);
+       clear_bit(LRUVEC_NODE_CONGESTED, &lruvec->flags);
+       clear_bit(LRUVEC_CGROUP_CONGESTED, &lruvec->flags);
        clear_bit(PGDAT_DIRTY, &pgdat->flags);
        clear_bit(PGDAT_WRITEBACK, &pgdat->flags);
 }