*/
        struct mem_cgroup *target_mem_cgroup;
 
+       /* Can active pages be deactivated as part of reclaim? */
+#define DEACTIVATE_ANON 1
+#define DEACTIVATE_FILE 2
+       unsigned int may_deactivate:2;
+       unsigned int force_deactivate:1;
+       unsigned int skipped_deactivate:1;
+
        /* Writepage batching in laptop mode; RECLAIM_WRITE */
        unsigned int may_writepage:1;
 
        /* One of the zones is ready for compaction */
        unsigned int compaction_ready:1;
 
+       /* There is easily reclaimable cold cache in the current node */
+       unsigned int cache_trim_mode:1;
+
        /* The file pages on the current node are dangerously low */
        unsigned int file_is_tiny:1;
 
        return nr_reclaimed;
 }
 
+static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
+                                struct lruvec *lruvec, struct scan_control *sc)
+{
+       if (is_active_lru(lru)) {
+               if (sc->may_deactivate & (1 << is_file_lru(lru)))
+                       shrink_active_list(nr_to_scan, lruvec, sc, lru);
+               else
+                       sc->skipped_deactivate = 1;
+               return 0;
+       }
+
+       return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
+}
+
 /*
  * The inactive anon list should be small enough that the VM never has
  * to do too much work.
  *    1TB     101        10GB
  *   10TB     320        32GB
  */
-static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
-                                struct scan_control *sc, bool trace)
+static bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru)
 {
-       enum lru_list active_lru = file * LRU_FILE + LRU_ACTIVE;
-       struct pglist_data *pgdat = lruvec_pgdat(lruvec);
-       enum lru_list inactive_lru = file * LRU_FILE;
+       enum lru_list active_lru = inactive_lru + LRU_ACTIVE;
        unsigned long inactive, active;
        unsigned long inactive_ratio;
-       struct lruvec *target_lruvec;
-       unsigned long refaults;
        unsigned long gb;
 
-       inactive = lruvec_lru_size(lruvec, inactive_lru, sc->reclaim_idx);
-       active = lruvec_lru_size(lruvec, active_lru, sc->reclaim_idx);
+       inactive = lruvec_page_state(lruvec, NR_LRU_BASE + inactive_lru);
+       active = lruvec_page_state(lruvec, NR_LRU_BASE + active_lru);
 
-       /*
-        * When refaults are being observed, it means a new workingset
-        * is being established. Disable active list protection to get
-        * rid of the stale workingset quickly.
-        */
-       target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
-       refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE);
-       if (file && target_lruvec->refaults != refaults) {
-               inactive_ratio = 0;
-       } else {
-               gb = (inactive + active) >> (30 - PAGE_SHIFT);
-               if (gb)
-                       inactive_ratio = int_sqrt(10 * gb);
-               else
-                       inactive_ratio = 1;
-       }
-
-       if (trace)
-               trace_mm_vmscan_inactive_list_is_low(pgdat->node_id, sc->reclaim_idx,
-                       lruvec_lru_size(lruvec, inactive_lru, MAX_NR_ZONES), inactive,
-                       lruvec_lru_size(lruvec, active_lru, MAX_NR_ZONES), active,
-                       inactive_ratio, file);
+       gb = (inactive + active) >> (30 - PAGE_SHIFT);
+       if (gb)
+               inactive_ratio = int_sqrt(10 * gb);
+       else
+               inactive_ratio = 1;
 
        return inactive * inactive_ratio < active;
 }
 
-static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan,
-                                struct lruvec *lruvec, struct scan_control *sc)
-{
-       if (is_active_lru(lru)) {
-               if (inactive_list_is_low(lruvec, is_file_lru(lru), sc, true))
-                       shrink_active_list(nr_to_scan, lruvec, sc, lru);
-               return 0;
-       }
-
-       return shrink_inactive_list(nr_to_scan, lruvec, sc, lru);
-}
-
 enum scan_balance {
        SCAN_EQUAL,
        SCAN_FRACT,
 
        /*
         * If the system is almost out of file pages, force-scan anon.
-        * But only if there are enough inactive anonymous pages on
-        * the LRU. Otherwise, the small LRU gets thrashed.
         */
-       if (sc->file_is_tiny &&
-           !inactive_list_is_low(lruvec, false, sc, false) &&
-           lruvec_lru_size(lruvec, LRU_INACTIVE_ANON,
-                           sc->reclaim_idx) >> sc->priority) {
+       if (sc->file_is_tiny) {
                scan_balance = SCAN_ANON;
                goto out;
        }
 
        /*
-        * If there is enough inactive page cache, i.e. if the size of the
-        * inactive list is greater than that of the active list *and* the
-        * inactive list actually has some pages to scan on this priority, we
-        * do not reclaim anything from the anonymous working set right now.
-        * Without the second condition we could end up never scanning an
-        * lruvec even if it has plenty of old anonymous pages unless the
-        * system is under heavy pressure.
+        * If there is enough inactive page cache, we do not reclaim
+        * anything from the anonymous working right now.
         */
-       if (!inactive_list_is_low(lruvec, true, sc, false) &&
-           lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) >> sc->priority) {
+       if (sc->cache_trim_mode) {
                scan_balance = SCAN_FILE;
                goto out;
        }
         * Even if we did not try to evict anon pages at all, we want to
         * rebalance the anon lru active/inactive ratio.
         */
-       if (total_swap_pages && inactive_list_is_low(lruvec, false, sc, true))
+       if (total_swap_pages && inactive_is_low(lruvec, LRU_INACTIVE_ANON))
                shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
                                   sc, LRU_ACTIVE_ANON);
 }
        unsigned long nr_reclaimed, nr_scanned;
        struct lruvec *target_lruvec;
        bool reclaimable = false;
+       unsigned long file;
 
        target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat);
 
        nr_reclaimed = sc->nr_reclaimed;
        nr_scanned = sc->nr_scanned;
 
+       /*
+        * Target desirable inactive:active list ratios for the anon
+        * and file LRU lists.
+        */
+       if (!sc->force_deactivate) {
+               unsigned long refaults;
+
+               if (inactive_is_low(target_lruvec, LRU_INACTIVE_ANON))
+                       sc->may_deactivate |= DEACTIVATE_ANON;
+               else
+                       sc->may_deactivate &= ~DEACTIVATE_ANON;
+
+               /*
+                * When refaults are being observed, it means a new
+                * workingset is being established. Deactivate to get
+                * rid of any stale active pages quickly.
+                */
+               refaults = lruvec_page_state(target_lruvec,
+                                            WORKINGSET_ACTIVATE);
+               if (refaults != target_lruvec->refaults ||
+                   inactive_is_low(target_lruvec, LRU_INACTIVE_FILE))
+                       sc->may_deactivate |= DEACTIVATE_FILE;
+               else
+                       sc->may_deactivate &= ~DEACTIVATE_FILE;
+       } else
+               sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE;
+
+       /*
+        * If we have plenty of inactive file pages that aren't
+        * thrashing, try to reclaim those first before touching
+        * anonymous pages.
+        */
+       file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE);
+       if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE))
+               sc->cache_trim_mode = 1;
+       else
+               sc->cache_trim_mode = 0;
+
        /*
         * Prevent the reclaimer from falling into the cache trap: as
         * cache pages start out inactive, every cache fault will tip
         * anon pages.  Try to detect this based on file LRU size.
         */
        if (!cgroup_reclaim(sc)) {
-               unsigned long file;
-               unsigned long free;
-               int z;
                unsigned long total_high_wmark = 0;
+               unsigned long free, anon;
+               int z;
 
                free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
                file = node_page_state(pgdat, NR_ACTIVE_FILE) +
                        total_high_wmark += high_wmark_pages(zone);
                }
 
-               sc->file_is_tiny = file + free <= total_high_wmark;
+               /*
+                * Consider anon: if that's low too, this isn't a
+                * runaway file reclaim problem, but rather just
+                * extreme pressure. Reclaim as per usual then.
+                */
+               anon = node_page_state(pgdat, NR_INACTIVE_ANON);
+
+               sc->file_is_tiny =
+                       file + free <= total_high_wmark &&
+                       !(sc->may_deactivate & DEACTIVATE_ANON) &&
+                       anon >> sc->priority;
        }
 
        shrink_node_memcgs(pgdat, sc);
        if (sc->compaction_ready)
                return 1;
 
+       /*
+        * We make inactive:active ratio decisions based on the node's
+        * composition of memory, but a restrictive reclaim_idx or a
+        * memory.low cgroup setting can exempt large amounts of
+        * memory from reclaim. Neither of which are very common, so
+        * instead of doing costly eligibility calculations of the
+        * entire cgroup subtree up front, we assume the estimates are
+        * good, and retry with forcible deactivation if that fails.
+        */
+       if (sc->skipped_deactivate) {
+               sc->priority = initial_priority;
+               sc->force_deactivate = 1;
+               sc->skipped_deactivate = 0;
+               goto retry;
+       }
+
        /* Untapped cgroup reserves?  Don't OOM, retry. */
        if (sc->memcg_low_skipped) {
                sc->priority = initial_priority;
+               sc->force_deactivate = 0;
+               sc->skipped_deactivate = 0;
                sc->memcg_low_reclaim = 1;
                sc->memcg_low_skipped = 0;
                goto retry;
                                struct scan_control *sc)
 {
        struct mem_cgroup *memcg;
+       struct lruvec *lruvec;
 
        if (!total_swap_pages)
                return;
 
+       lruvec = mem_cgroup_lruvec(NULL, pgdat);
+       if (!inactive_is_low(lruvec, LRU_INACTIVE_ANON))
+               return;
+
        memcg = mem_cgroup_iter(NULL, NULL, NULL);
        do {
-               struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
-
-               if (inactive_list_is_low(lruvec, false, sc, true))
-                       shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
-                                          sc, LRU_ACTIVE_ANON);
-
+               lruvec = mem_cgroup_lruvec(memcg, pgdat);
+               shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+                                  sc, LRU_ACTIVE_ANON);
                memcg = mem_cgroup_iter(NULL, memcg, NULL);
        } while (memcg);
 }