]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
memcg: skip cgroup_file_notify if spinning is not allowed
authorShakeel Butt <shakeel.butt@linux.dev>
Mon, 22 Sep 2025 22:02:03 +0000 (15:02 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Wed, 1 Oct 2025 22:58:25 +0000 (15:58 -0700)
Generally memcg charging is allowed from all the contexts including NMI
where even spinning on spinlock can cause locking issues.  However one
call chain was missed during the addition of memcg charging from any
context support.  That is try_charge_memcg() -> memcg_memory_event() ->
cgroup_file_notify().

The possible function call tree under cgroup_file_notify() can acquire
many different spin locks in spinning mode.  Some of them are
cgroup_file_kn_lock, kernfs_notify_lock, pool_workqeue's lock.  So, let's
just skip cgroup_file_notify() from memcg charging if the context does not
allow spinning.

Alternative approach was also explored where instead of skipping
cgroup_file_notify(), we defer the memcg event processing to irq_work [1].
However it adds complexity and it was decided to keep things simple until
we need more memcg events with !allow_spinning requirement.

Link: https://lore.kernel.org/all/5qi2llyzf7gklncflo6gxoozljbm4h3tpnuv4u4ej4ztysvi6f@x44v7nz2wdzd/
Link: https://lkml.kernel.org/r/20250922220203.261714-1-shakeel.butt@linux.dev
Fixes: 3ac4638a734a ("memcg: make memcg_rstat_updated nmi safe")
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Closes: https://lore.kernel.org/all/20250905061919.439648-1-yepeilin@google.com/
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peilin Ye <yepeilin@google.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/memcontrol.h
mm/memcontrol.c

index 785173aa0739cc5579da30e61b108d8bc9903f0d..99850fec7f8b9b62bf26f480feada0078b9cfeb6 100644 (file)
@@ -987,22 +987,28 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
        count_memcg_events_mm(mm, idx, 1);
 }
 
-static inline void memcg_memory_event(struct mem_cgroup *memcg,
-                                     enum memcg_memory_event event)
+static inline void __memcg_memory_event(struct mem_cgroup *memcg,
+                                       enum memcg_memory_event event,
+                                       bool allow_spinning)
 {
        bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX ||
                          event == MEMCG_SWAP_FAIL;
 
+       /* For now only MEMCG_MAX can happen with !allow_spinning context. */
+       VM_WARN_ON_ONCE(!allow_spinning && event != MEMCG_MAX);
+
        atomic_long_inc(&memcg->memory_events_local[event]);
-       if (!swap_event)
+       if (!swap_event && allow_spinning)
                cgroup_file_notify(&memcg->events_local_file);
 
        do {
                atomic_long_inc(&memcg->memory_events[event]);
-               if (swap_event)
-                       cgroup_file_notify(&memcg->swap_events_file);
-               else
-                       cgroup_file_notify(&memcg->events_file);
+               if (allow_spinning) {
+                       if (swap_event)
+                               cgroup_file_notify(&memcg->swap_events_file);
+                       else
+                               cgroup_file_notify(&memcg->events_file);
+               }
 
                if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
                        break;
@@ -1012,6 +1018,12 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
                 !mem_cgroup_is_root(memcg));
 }
 
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+                                     enum memcg_memory_event event)
+{
+       __memcg_memory_event(memcg, event, true);
+}
+
 static inline void memcg_memory_event_mm(struct mm_struct *mm,
                                         enum memcg_memory_event event)
 {
index 8dd7fbed5a94271f90c45871e009954018ab0b3a..7d2e331656c6be0e11e4c4bafa15fe824415e1fc 100644 (file)
@@ -2309,12 +2309,13 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
        bool drained = false;
        bool raised_max_event = false;
        unsigned long pflags;
+       bool allow_spinning = gfpflags_allow_spinning(gfp_mask);
 
 retry:
        if (consume_stock(memcg, nr_pages))
                return 0;
 
-       if (!gfpflags_allow_spinning(gfp_mask))
+       if (!allow_spinning)
                /* Avoid the refill and flush of the older stock */
                batch = nr_pages;
 
@@ -2350,7 +2351,7 @@ retry:
        if (!gfpflags_allow_blocking(gfp_mask))
                goto nomem;
 
-       memcg_memory_event(mem_over_limit, MEMCG_MAX);
+       __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning);
        raised_max_event = true;
 
        psi_memstall_enter(&pflags);
@@ -2417,7 +2418,7 @@ force:
         * a MEMCG_MAX event.
         */
        if (!raised_max_event)
-               memcg_memory_event(mem_over_limit, MEMCG_MAX);
+               __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning);
 
        /*
         * The allocation either can't fail or will lead to more memory