memcg: flush stats only if updated

author Shakeel Butt <shakeelb@google.com>

Fri, 5 Nov 2021 20:37:31 +0000 (13:37 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 6 Nov 2021 20:30:35 +0000 (13:30 -0700)
author Shakeel Butt <shakeelb@google.com>
Fri, 5 Nov 2021 20:37:31 +0000 (13:37 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 6 Nov 2021 20:30:35 +0000 (13:30 -0700)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 15e6fb5d56a7e855ce5ece7d3153a497ae4eb560..ae12fdd4bc0de2f3e5f22a39540dfbc7293c85f7 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -103,11 +103,6 @@ static bool do_memsw_account(void)
         return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap;
  }
  
-/* memcg and lruvec stats flushing */
-static void flush_memcg_stats_dwork(struct work_struct *w);
-static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
-static DEFINE_SPINLOCK(stats_flush_lock);
-
  #define THRESHOLDS_EVENTS_TARGET 128
  #define SOFTLIMIT_EVENTS_TARGET 1024
  
@@ -635,6 +630,56 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
         return mz;
  }
  
+/*
+ * memcg and lruvec stats flushing
+ *
+ * Many codepaths leading to stats update or read are performance sensitive and
+ * adding stats flushing in such codepaths is not desirable. So, to optimize the
+ * flushing the kernel does:
+ *
+ * 1) Periodically and asynchronously flush the stats every 2 seconds to not let
+ *    rstat update tree grow unbounded.
+ *
+ * 2) Flush the stats synchronously on reader side only when there are more than
+ *    (MEMCG_CHARGE_BATCH * nr_cpus) update events. Though this optimization
+ *    will let stats be out of sync by atmost (MEMCG_CHARGE_BATCH * nr_cpus) but
+ *    only for 2 seconds due to (1).
+ */
+static void flush_memcg_stats_dwork(struct work_struct *w);
+static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
+static DEFINE_SPINLOCK(stats_flush_lock);
+static DEFINE_PER_CPU(unsigned int, stats_updates);
+static atomic_t stats_flush_threshold = ATOMIC_INIT(0);
+
+static inline void memcg_rstat_updated(struct mem_cgroup *memcg)
+{
+       cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
+       if (!(__this_cpu_inc_return(stats_updates) % MEMCG_CHARGE_BATCH))
+               atomic_inc(&stats_flush_threshold);
+}
+
+static void __mem_cgroup_flush_stats(void)
+{
+       if (!spin_trylock(&stats_flush_lock))
+               return;
+
+       cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
+       atomic_set(&stats_flush_threshold, 0);
+       spin_unlock(&stats_flush_lock);
+}
+
+void mem_cgroup_flush_stats(void)
+{
+       if (atomic_read(&stats_flush_threshold) > num_online_cpus())
+               __mem_cgroup_flush_stats();
+}
+
+static void flush_memcg_stats_dwork(struct work_struct *w)
+{
+       mem_cgroup_flush_stats();
+       queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
+}
+
  /**
   * __mod_memcg_state - update cgroup memory statistics
   * @memcg: the memory cgroup
@@ -647,7 +692,7 @@ void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val)
                 return;
  
         __this_cpu_add(memcg->vmstats_percpu->state[idx], val);
-       cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
+       memcg_rstat_updated(memcg);
  }
  
  /* idx can be of type enum memcg_stat_item or node_stat_item. */
@@ -675,10 +720,12 @@ void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
         memcg = pn->memcg;
  
         /* Update memcg */
-       __mod_memcg_state(memcg, idx, val);
+       __this_cpu_add(memcg->vmstats_percpu->state[idx], val);
  
         /* Update lruvec */
         __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
+
+       memcg_rstat_updated(memcg);
  }
  
  /**
@@ -780,7 +827,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
                 return;
  
         __this_cpu_add(memcg->vmstats_percpu->events[idx], count);
-       cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
+       memcg_rstat_updated(memcg);
  }
  
  static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
@@ -5341,21 +5388,6 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
         memcg_wb_domain_size_changed(memcg);
  }
  
-void mem_cgroup_flush_stats(void)
-{
-       if (!spin_trylock(&stats_flush_lock))
-               return;
-
-       cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
-       spin_unlock(&stats_flush_lock);
-}
-
-static void flush_memcg_stats_dwork(struct work_struct *w)
-{
-       mem_cgroup_flush_stats();
-       queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
-}
-
  static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
  {
         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
author	Shakeel Butt <shakeelb@google.com>
	Fri, 5 Nov 2021 20:37:31 +0000 (13:37 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 6 Nov 2021 20:30:35 +0000 (13:30 -0700)