]> www.infradead.org Git - users/hch/misc.git/commitdiff
memcg: skip cgroup_file_notify if spinning is not allowed
authorShakeel Butt <shakeel.butt@linux.dev>
Mon, 22 Sep 2025 22:02:03 +0000 (15:02 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Tue, 7 Oct 2025 21:01:11 +0000 (14:01 -0700)
Generally memcg charging is allowed from all the contexts including NMI
where even spinning on spinlock can cause locking issues.  However one
call chain was missed during the addition of memcg charging from any
context support.  That is try_charge_memcg() -> memcg_memory_event() ->
cgroup_file_notify().

The possible function call tree under cgroup_file_notify() can acquire
many different spin locks in spinning mode.  Some of them are
cgroup_file_kn_lock, kernfs_notify_lock, pool_workqeue's lock.  So, let's
just skip cgroup_file_notify() from memcg charging if the context does not
allow spinning.

Alternative approach was also explored where instead of skipping
cgroup_file_notify(), we defer the memcg event processing to irq_work [1].
However it adds complexity and it was decided to keep things simple until
we need more memcg events with !allow_spinning requirement.

Link: https://lore.kernel.org/all/5qi2llyzf7gklncflo6gxoozljbm4h3tpnuv4u4ej4ztysvi6f@x44v7nz2wdzd/
Link: https://lkml.kernel.org/r/20250922220203.261714-1-shakeel.butt@linux.dev
Fixes: 3ac4638a734a ("memcg: make memcg_rstat_updated nmi safe")
Signed-off-by: Shakeel Butt <shakeel.butt@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Closes: https://lore.kernel.org/all/20250905061919.439648-1-yepeilin@google.com/
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peilin Ye <yepeilin@google.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/memcontrol.h
mm/memcontrol.c

index 16fe0306e50eaf6d1470a4b7cc095c8208842c44..873e510d6f8d9451566d2688941033385bef47a3 100644 (file)
@@ -1001,22 +1001,28 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
        count_memcg_events_mm(mm, idx, 1);
 }
 
-static inline void memcg_memory_event(struct mem_cgroup *memcg,
-                                     enum memcg_memory_event event)
+static inline void __memcg_memory_event(struct mem_cgroup *memcg,
+                                       enum memcg_memory_event event,
+                                       bool allow_spinning)
 {
        bool swap_event = event == MEMCG_SWAP_HIGH || event == MEMCG_SWAP_MAX ||
                          event == MEMCG_SWAP_FAIL;
 
+       /* For now only MEMCG_MAX can happen with !allow_spinning context. */
+       VM_WARN_ON_ONCE(!allow_spinning && event != MEMCG_MAX);
+
        atomic_long_inc(&memcg->memory_events_local[event]);
-       if (!swap_event)
+       if (!swap_event && allow_spinning)
                cgroup_file_notify(&memcg->events_local_file);
 
        do {
                atomic_long_inc(&memcg->memory_events[event]);
-               if (swap_event)
-                       cgroup_file_notify(&memcg->swap_events_file);
-               else
-                       cgroup_file_notify(&memcg->events_file);
+               if (allow_spinning) {
+                       if (swap_event)
+                               cgroup_file_notify(&memcg->swap_events_file);
+                       else
+                               cgroup_file_notify(&memcg->events_file);
+               }
 
                if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
                        break;
@@ -1026,6 +1032,12 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg,
                 !mem_cgroup_is_root(memcg));
 }
 
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+                                     enum memcg_memory_event event)
+{
+       __memcg_memory_event(memcg, event, true);
+}
+
 static inline void memcg_memory_event_mm(struct mm_struct *mm,
                                         enum memcg_memory_event event)
 {
index e090f29eb03bd1f62be1a90fd3549ee217ae40ed..4deda33625f41aa81db0276454f56b826a033146 100644 (file)
@@ -2307,12 +2307,13 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
        bool drained = false;
        bool raised_max_event = false;
        unsigned long pflags;
+       bool allow_spinning = gfpflags_allow_spinning(gfp_mask);
 
 retry:
        if (consume_stock(memcg, nr_pages))
                return 0;
 
-       if (!gfpflags_allow_spinning(gfp_mask))
+       if (!allow_spinning)
                /* Avoid the refill and flush of the older stock */
                batch = nr_pages;
 
@@ -2348,7 +2349,7 @@ retry:
        if (!gfpflags_allow_blocking(gfp_mask))
                goto nomem;
 
-       memcg_memory_event(mem_over_limit, MEMCG_MAX);
+       __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning);
        raised_max_event = true;
 
        psi_memstall_enter(&pflags);
@@ -2415,7 +2416,7 @@ force:
         * a MEMCG_MAX event.
         */
        if (!raised_max_event)
-               memcg_memory_event(mem_over_limit, MEMCG_MAX);
+               __memcg_memory_event(mem_over_limit, MEMCG_MAX, allow_spinning);
 
        /*
         * The allocation either can't fail or will lead to more memory