]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
mm: thp: reparent the split queue during memcg offline
authorQi Zheng <zhengqi.arch@bytedance.com>
Wed, 15 Oct 2025 06:35:33 +0000 (14:35 +0800)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 22 Oct 2025 01:51:39 +0000 (18:51 -0700)
Similar to list_lru, the split queue is relatively independent and does
not need to be reparented along with objcg and LRU folios (holding objcg
lock and lru lock).  So let's apply the similar mechanism as list_lru to
reparent the split queue separately when memcg is offine.

This is also a preparation for reparenting LRU folios.

Link: https://lkml.kernel.org/r/645f537dee489faa45e611d303bf482a06f0ece7.1760509767.git.zhengqi.arch@bytedance.com
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Muchun Song <muchun.song@linux.dev>
Acked-by: David Hildenbrand <david@redhat.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Lance Yang <lance.yang@linux.dev>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Mariano Pache <npache@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/huge_mm.h
include/linux/memcontrol.h
mm/huge_memory.c
mm/memcontrol.c

index 396d9e3d1d46971e79725c45541bd5fc294a514a..7698b3542c4f0e7053365e47c4bccce8dd87aa9b 100644 (file)
@@ -409,6 +409,9 @@ static inline int split_huge_page(struct page *page)
        return split_huge_page_to_list_to_order(page, NULL, 0);
 }
 void deferred_split_folio(struct folio *folio, bool partially_mapped);
+#ifdef CONFIG_MEMCG
+void reparent_deferred_split_queue(struct mem_cgroup *memcg);
+#endif
 
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long address, bool freeze);
@@ -613,6 +616,7 @@ static inline int try_folio_split_to_order(struct folio *folio,
 }
 
 static inline void deferred_split_folio(struct folio *folio, bool partially_mapped) {}
+static inline void reparent_deferred_split_queue(struct mem_cgroup *memcg) {}
 #define split_huge_pmd(__vma, __pmd, __address)        \
        do { } while (0)
 
index 0b2d4ec79adfe28e63c4eb61de37ccf7fb3e00b4..5ca97fece69079948d195ba69576592666b37417 100644 (file)
@@ -1801,6 +1801,11 @@ static inline void count_objcg_events(struct obj_cgroup *objcg,
 
 bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid);
 
+static inline bool memcg_is_dying(struct mem_cgroup *memcg)
+{
+       return memcg ? css_is_dying(&memcg->css) : false;
+}
+
 #else
 static inline bool mem_cgroup_kmem_disabled(void)
 {
@@ -1867,6 +1872,11 @@ static inline bool mem_cgroup_node_allowed(struct mem_cgroup *memcg, int nid)
 {
        return true;
 }
+
+static inline bool memcg_is_dying(struct mem_cgroup *memcg)
+{
+       return false;
+}
 #endif /* CONFIG_MEMCG */
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP)
index 920475659defe823cd080bc84f2a69f3ef3fdb55..370ecfd6a1821317c6f80220d4a05b82283f3125 100644 (file)
@@ -1117,8 +1117,19 @@ static struct deferred_split *split_queue_lock(int nid, struct mem_cgroup *memcg
 {
        struct deferred_split *queue;
 
+retry:
        queue = memcg_split_queue(nid, memcg);
        spin_lock(&queue->split_queue_lock);
+       /*
+        * There is a period between setting memcg to dying and reparenting
+        * deferred split queue, and during this period the THPs in the deferred
+        * split queue will be hidden from the shrinker side.
+        */
+       if (unlikely(memcg_is_dying(memcg))) {
+               spin_unlock(&queue->split_queue_lock);
+               memcg = parent_mem_cgroup(memcg);
+               goto retry;
+       }
 
        return queue;
 }
@@ -1128,8 +1139,14 @@ split_queue_lock_irqsave(int nid, struct mem_cgroup *memcg, unsigned long *flags
 {
        struct deferred_split *queue;
 
+retry:
        queue = memcg_split_queue(nid, memcg);
        spin_lock_irqsave(&queue->split_queue_lock, *flags);
+       if (unlikely(memcg_is_dying(memcg))) {
+               spin_unlock_irqrestore(&queue->split_queue_lock, *flags);
+               memcg = parent_mem_cgroup(memcg);
+               goto retry;
+       }
 
        return queue;
 }
@@ -4399,6 +4416,33 @@ next:
        return split;
 }
 
+#ifdef CONFIG_MEMCG
+void reparent_deferred_split_queue(struct mem_cgroup *memcg)
+{
+       struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+       struct deferred_split *ds_queue = &memcg->deferred_split_queue;
+       struct deferred_split *parent_ds_queue = &parent->deferred_split_queue;
+       int nid;
+
+       spin_lock_irq(&ds_queue->split_queue_lock);
+       spin_lock_nested(&parent_ds_queue->split_queue_lock, SINGLE_DEPTH_NESTING);
+
+       if (!ds_queue->split_queue_len)
+               goto unlock;
+
+       list_splice_tail_init(&ds_queue->split_queue, &parent_ds_queue->split_queue);
+       parent_ds_queue->split_queue_len += ds_queue->split_queue_len;
+       ds_queue->split_queue_len = 0;
+
+       for_each_node(nid)
+               set_shrinker_bit(parent, nid, shrinker_id(deferred_split_shrinker));
+
+unlock:
+       spin_unlock(&parent_ds_queue->split_queue_lock);
+       spin_unlock_irq(&ds_queue->split_queue_lock);
+}
+#endif
+
 #ifdef CONFIG_DEBUG_FS
 static void split_huge_pages_all(void)
 {
index 3ae5cbcaed752b57fb97e979cb022628873a5df1..75d1715628f7aec04edf4275c7a6cbb47e2215f9 100644 (file)
@@ -3888,6 +3888,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
        zswap_memcg_offline_cleanup(memcg);
 
        memcg_offline_kmem(memcg);
+       reparent_deferred_split_queue(memcg);
        reparent_shrinker_deferred(memcg);
        wb_memcg_offline(memcg);
        lru_gen_offline_memcg(memcg);