mm: re-enable kswapd when memory pressure subsides or demotion is toggled

author Chanwon Park <flyinrm@gmail.com>

Mon, 8 Sep 2025 10:04:10 +0000 (19:04 +0900)

committer Andrew Morton <akpm@linux-foundation.org>

Fri, 12 Sep 2025 00:26:00 +0000 (17:26 -0700)
author Chanwon Park <flyinrm@gmail.com>
Mon, 8 Sep 2025 10:04:10 +0000 (19:04 +0900)
committer Andrew Morton <akpm@linux-foundation.org>
Fri, 12 Sep 2025 00:26:00 +0000 (17:26 -0700)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h

index 6c4eae96160dbfeeb4b245bc9adbcd1da64942e0..7fb7331c57250782a464a9583c6ea4867f4ffdab 100644 (file)
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1440,7 +1440,7 @@ typedef struct pglist_data {
         int kswapd_order;
         enum zone_type kswapd_highest_zoneidx;
  
-       int kswapd_failures;            /* Number of 'reclaimed == 0' runs */
+       atomic_t kswapd_failures;       /* Number of 'reclaimed == 0' runs */
  
  #ifdef CONFIG_COMPACTION
         int kcompactd_max_order;
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c

index 0382b6942b8bd4d6650e340d9f88e31365e851dd..0ea5c13f10a2333de274ab5f0d45e4bf97019924 100644 (file)
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -942,11 +942,23 @@ static ssize_t demotion_enabled_store(struct kobject *kobj,
                                       const char *buf, size_t count)
  {
         ssize_t ret;
+       bool before = numa_demotion_enabled;
  
         ret = kstrtobool(buf, &numa_demotion_enabled);
         if (ret)
                 return ret;
  
+       /*
+        * Reset kswapd_failures statistics. They may no longer be
+        * valid since the policy for kswapd has changed.
+        */
+       if (before == false && numa_demotion_enabled == true) {
+               struct pglist_data *pgdat;
+
+               for_each_online_pgdat(pgdat)
+                       atomic_set(&pgdat->kswapd_failures, 0);
+       }
+
         return count;
  }
  
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index df6df302d0c58619ec4614df33deeb47d4f05b7d..665a94f2759aa43ba18b2f506238ca3e40999f98 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2866,8 +2866,23 @@ static void free_frozen_page_commit(struct zone *zone,
                                    pcp, pindex);
                 if (test_bit(ZONE_BELOW_HIGH, &zone->flags) &&
                     zone_watermark_ok(zone, 0, high_wmark_pages(zone),
-                                     ZONE_MOVABLE, 0))
+                                     ZONE_MOVABLE, 0)) {
+                       struct pglist_data *pgdat = zone->zone_pgdat;
                         clear_bit(ZONE_BELOW_HIGH, &zone->flags);
+
+                       /*
+                        * Assume that memory pressure on this node is gone
+                        * and may be in a reclaimable state. If a memory
+                        * fallback node exists, direct reclaim may not have
+                        * been triggered, leaving 'hopeless node' stay in
+                        * that state for a while. Let kswapd work again by
+                        * resetting kswapd_failures.
+                        */
+                       if (atomic_read(&pgdat->kswapd_failures)
+                           >= MAX_RECLAIM_RETRIES &&
+                           next_memory_node(pgdat->node_id) < MAX_NUMNODES)
+                               atomic_set(&pgdat->kswapd_failures, 0);
+               }
         }
  }
  
diff --git a/mm/show_mem.c b/mm/show_mem.c

index 90a9a37116e785ccb44a73f0434c01df3c20cf2b..be03804907cdf1a0540d9eef20f249a10a1d36ad 100644 (file)
--- a/mm/show_mem.c
+++ b/mm/show_mem.c
@@ -278,7 +278,8 @@ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_z
  #endif
                         K(node_page_state(pgdat, NR_PAGETABLE)),
                         K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)),
-                       str_yes_no(pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES),
+                       str_yes_no(atomic_read(&pgdat->kswapd_failures)
+                                  >= MAX_RECLAIM_RETRIES),
                         K(node_page_state(pgdat, NR_BALLOON_PAGES)));
         }
  
diff --git a/mm/vmscan.c b/mm/vmscan.c

index e170c12e2065f7ed77ca046fc75323805a1f2f56..b2fc8b626d3dff9d34c2d747c4798e67a344305e 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -518,7 +518,7 @@ static bool skip_throttle_noprogress(pg_data_t *pgdat)
          * If kswapd is disabled, reschedule if necessary but do not
          * throttle as the system is likely near OOM.
          */
-       if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+       if (atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES)
                 return true;
  
         /*
@@ -5101,7 +5101,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
         blk_finish_plug(&plug);
  done:
         if (sc->nr_reclaimed > reclaimed)
-               pgdat->kswapd_failures = 0;
+               atomic_set(&pgdat->kswapd_failures, 0);
  }
  
  /******************************************************************************
@@ -6180,7 +6180,7 @@ again:
          * successful direct reclaim run will revive a dormant kswapd.
          */
         if (reclaimable)
-               pgdat->kswapd_failures = 0;
+               atomic_set(&pgdat->kswapd_failures, 0);
         else if (sc->cache_trim_mode)
                 sc->cache_trim_mode_failed = 1;
  }
@@ -6492,7 +6492,7 @@ static bool allow_direct_reclaim(pg_data_t *pgdat)
         int i;
         bool wmark_ok;
  
-       if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+       if (atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES)
                 return true;
  
         for_each_managed_zone_pgdat(zone, pgdat, i, ZONE_NORMAL) {
@@ -6902,7 +6902,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order,
                 wake_up_all(&pgdat->pfmemalloc_wait);
  
         /* Hopeless node, leave it to direct reclaim */
-       if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+       if (atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES)
                 return true;
  
         if (pgdat_balanced(pgdat, order, highest_zoneidx)) {
@@ -7170,7 +7170,7 @@ restart:
         }
  
         if (!sc.nr_reclaimed)
-               pgdat->kswapd_failures++;
+               atomic_inc(&pgdat->kswapd_failures);
  
  out:
         clear_reclaim_active(pgdat, highest_zoneidx);
@@ -7429,7 +7429,7 @@ void wakeup_kswapd(struct zone *zone, gfp_t gfp_flags, int order,
                 return;
  
         /* Hopeless node, leave it to direct reclaim if possible */
-       if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ||
+       if (atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES ||
             (pgdat_balanced(pgdat, order, highest_zoneidx) &&
              !pgdat_watermark_boosted(pgdat, highest_zoneidx))) {
                 /*
diff --git a/mm/vmstat.c b/mm/vmstat.c

index e522decf6a72f741aa16605566db61ce958878e2..bb09c032eecfa2fcd05216c7bb2550cd93e46bc8 100644 (file)
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1848,7 +1848,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
         seq_printf(m,
                    "\n  node_unreclaimable:  %u"
                    "\n  start_pfn:           %lu",
-                  pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
+                  atomic_read(&pgdat->kswapd_failures) >= MAX_RECLAIM_RETRIES,
                    zone->zone_start_pfn);
         seq_putc(m, '\n');
  }
author	Chanwon Park <flyinrm@gmail.com>
	Mon, 8 Sep 2025 10:04:10 +0000 (19:04 +0900)
committer	Andrew Morton <akpm@linux-foundation.org>
	Fri, 12 Sep 2025 00:26:00 +0000 (17:26 -0700)
include/linux/mmzone.h		patch \| blob \| history
mm/memory-tiers.c		patch \| blob \| history
mm/page_alloc.c		patch \| blob \| history
mm/show_mem.c		patch \| blob \| history
mm/vmscan.c		patch \| blob \| history
mm/vmstat.c		patch \| blob \| history