]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm: vmpressure: don't count proactive reclaim in vmpressure
authorYosry Ahmed <yosryahmed@google.com>
Thu, 14 Jul 2022 06:49:18 +0000 (06:49 +0000)
committerLiam R. Howlett <Liam.Howlett@oracle.com>
Wed, 3 Aug 2022 18:17:12 +0000 (14:17 -0400)
vmpressure is used in cgroup v1 to notify userspace of reclaim efficiency
events, and is also used in both cgroup v1 and v2 as a signal for memory
pressure for networking, see mem_cgroup_under_socket_pressure().

Proactive reclaim intends to probe memcgs for cold memory, without
affecting their performance.  Hence, reclaim caused by writing to
memory.reclaim should not trigger vmpressure.

Link: https://lkml.kernel.org/r/20220714064918.2576464-1-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Acked-by: Shakeel Butt <shakeelb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Hildenbrand <david@redhat.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: NeilBrown <neilb@suse.de>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/swap.h
mm/memcontrol.c
mm/vmscan.c

index 6d11c51b2b627503522158cd6cadd0041ecddc10..ea895b40e6ff12d24bb286cfeba7b2d79ad7a219 100644 (file)
@@ -411,10 +411,13 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
 extern unsigned long zone_reclaimable_pages(struct zone *zone);
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                        gfp_t gfp_mask, nodemask_t *mask);
+
+#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
+#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                                  unsigned long nr_pages,
                                                  gfp_t gfp_mask,
-                                                 bool may_swap);
+                                                 unsigned int reclaim_options);
 extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
                                                gfp_t gfp_mask, bool noswap,
                                                pg_data_t *pgdat,
index 7e463660209aeed4cfb9df2f96f733120c615ec7..56b25521bf8b6df5cebd689e0204765acda33512 100644 (file)
@@ -2330,7 +2330,8 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
 
                psi_memstall_enter(&pflags);
                nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
-                                                            gfp_mask, true);
+                                                       gfp_mask,
+                                                       MEMCG_RECLAIM_MAY_SWAP);
                psi_memstall_leave(&pflags);
        } while ((memcg = parent_mem_cgroup(memcg)) &&
                 !mem_cgroup_is_root(memcg));
@@ -2575,7 +2576,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
        struct page_counter *counter;
        unsigned long nr_reclaimed;
        bool passed_oom = false;
-       bool may_swap = true;
+       unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
        bool drained = false;
        bool raised_max_event = false;
        unsigned long pflags;
@@ -2593,7 +2594,7 @@ retry:
                mem_over_limit = mem_cgroup_from_counter(counter, memory);
        } else {
                mem_over_limit = mem_cgroup_from_counter(counter, memsw);
-               may_swap = false;
+               reclaim_options &= ~MEMCG_RECLAIM_MAY_SWAP;
        }
 
        if (batch > nr_pages) {
@@ -2621,7 +2622,7 @@ retry:
 
        psi_memstall_enter(&pflags);
        nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
-                                                   gfp_mask, may_swap);
+                                                   gfp_mask, reclaim_options);
        psi_memstall_leave(&pflags);
 
        if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -3439,8 +3440,8 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
                        continue;
                }
 
-               if (!try_to_free_mem_cgroup_pages(memcg, 1,
-                                       GFP_KERNEL, !memsw)) {
+               if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
+                                       memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {
                        ret = -EBUSY;
                        break;
                }
@@ -3550,7 +3551,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
                if (signal_pending(current))
                        return -EINTR;
 
-               if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true))
+               if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
+                                                 MEMCG_RECLAIM_MAY_SWAP))
                        nr_retries--;
        }
 
@@ -6302,7 +6304,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
                }
 
                reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
-                                                        GFP_KERNEL, true);
+                                       GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);
 
                if (!reclaimed && !nr_retries--)
                        break;
@@ -6351,7 +6353,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
 
                if (nr_reclaims) {
                        if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
-                                                         GFP_KERNEL, true))
+                                       GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))
                                nr_reclaims--;
                        continue;
                }
@@ -6480,6 +6482,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
        struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
        unsigned int nr_retries = MAX_RECLAIM_RETRIES;
        unsigned long nr_to_reclaim, nr_reclaimed = 0;
+       unsigned int reclaim_options;
        int err;
 
        buf = strstrip(buf);
@@ -6487,6 +6490,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
        if (err)
                return err;
 
+       reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
        while (nr_reclaimed < nr_to_reclaim) {
                unsigned long reclaimed;
 
@@ -6503,7 +6507,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 
                reclaimed = try_to_free_mem_cgroup_pages(memcg,
                                                nr_to_reclaim - nr_reclaimed,
-                                               GFP_KERNEL, true);
+                                               GFP_KERNEL, reclaim_options);
 
                if (!reclaimed && !nr_retries--)
                        return -EAGAIN;
index fbb4108250ee4efab573f49f14bb636c6f902d0a..9e7d8db42918796e8f6c82fa516fe0de0d287d89 100644 (file)
@@ -101,6 +101,9 @@ struct scan_control {
        /* Can pages be swapped as part of reclaim? */
        unsigned int may_swap:1;
 
+       /* Proactive reclaim invoked by userspace through memory.reclaim */
+       unsigned int proactive:1;
+
        /*
         * Cgroup memory below memory.low is protected as long as we
         * don't threaten to OOM. If any cgroup is reclaimed at
@@ -3180,9 +3183,10 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
                            sc->priority);
 
                /* Record the group's reclaim efficiency */
-               vmpressure(sc->gfp_mask, memcg, false,
-                          sc->nr_scanned - scanned,
-                          sc->nr_reclaimed - reclaimed);
+               if (!sc->proactive)
+                       vmpressure(sc->gfp_mask, memcg, false,
+                                  sc->nr_scanned - scanned,
+                                  sc->nr_reclaimed - reclaimed);
 
        } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
 }
@@ -3305,9 +3309,10 @@ again:
        }
 
        /* Record the subtree's reclaim efficiency */
-       vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
-                  sc->nr_scanned - nr_scanned,
-                  sc->nr_reclaimed - nr_reclaimed);
+       if (!sc->proactive)
+               vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
+                          sc->nr_scanned - nr_scanned,
+                          sc->nr_reclaimed - nr_reclaimed);
 
        if (sc->nr_reclaimed - nr_reclaimed)
                reclaimable = true;
@@ -3589,8 +3594,9 @@ retry:
                __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
 
        do {
-               vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
-                               sc->priority);
+               if (!sc->proactive)
+                       vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
+                                       sc->priority);
                sc->nr_scanned = 0;
                shrink_zones(zonelist, sc);
 
@@ -3880,7 +3886,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                           unsigned long nr_pages,
                                           gfp_t gfp_mask,
-                                          bool may_swap)
+                                          unsigned int reclaim_options)
 {
        unsigned long nr_reclaimed;
        unsigned int noreclaim_flag;
@@ -3893,7 +3899,8 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                .priority = DEF_PRIORITY,
                .may_writepage = !laptop_mode,
                .may_unmap = 1,
-               .may_swap = may_swap,
+               .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
+               .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
        };
        /*
         * Traverse the ZONELIST_FALLBACK zonelist of the current node to put