cgroup: change rstat function signatures from cgroup-based to css-based

author JP Kobryn <inwardvessel@gmail.com>

Fri, 4 Apr 2025 01:10:48 +0000 (18:10 -0700)

committer Tejun Heo <tj@kernel.org>

Fri, 4 Apr 2025 20:06:25 +0000 (10:06 -1000)
author JP Kobryn <inwardvessel@gmail.com>
Fri, 4 Apr 2025 01:10:48 +0000 (18:10 -0700)
committer Tejun Heo <tj@kernel.org>
Fri, 4 Apr 2025 20:06:25 +0000 (10:06 -1000)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c

index 5905f277057bc59dcaeaf7a4c050b3cbfd61c122..0560ea4028564e83c91f0c053e9649bc051023e9 100644 (file)
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1144,7 +1144,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
  /*
   * We source root cgroup stats from the system-wide stats to avoid
   * tracking the same information twice and incurring overhead when no
- * cgroups are defined. For that reason, cgroup_rstat_flush in
+ * cgroups are defined. For that reason, css_rstat_flush in
   * blkcg_print_stat does not actually fill out the iostat in the root
   * cgroup's blkcg_gq.
   *
@@ -1253,7 +1253,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
         if (!seq_css(sf)->parent)
                 blkcg_fill_root_iostats();
         else
-               cgroup_rstat_flush(blkcg->css.cgroup);
+               css_rstat_flush(&blkcg->css);
  
         rcu_read_lock();
         hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
@@ -2243,7 +2243,7 @@ void blk_cgroup_bio_start(struct bio *bio)
         }
  
         u64_stats_update_end_irqrestore(&bis->sync, flags);
-       cgroup_rstat_updated(blkcg->css.cgroup, cpu);
+       css_rstat_updated(&blkcg->css, cpu);
         put_cpu();
  }
  
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h

index 1bf2e8db6dacf48f1864b3a46925972c98bae27f..e58bfb8801113133b8d725594513eb2ecb8d58a4 100644 (file)
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -536,7 +536,7 @@ struct cgroup {
         /*
          * A singly-linked list of cgroup structures to be rstat flushed.
          * This is a scratch field to be used exclusively by
-        * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
+        * css_rstat_flush_locked() and protected by cgroup_rstat_lock.
          */
         struct cgroup   *rstat_flush_next;
  
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index 65d95bb2199fdc3866b13fa3e156481aeb428229..1f5b0a4a33566d16952a90cd7c0739927e1a2ce1 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -693,8 +693,8 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
  /*
   * cgroup scalable recursive statistics.
   */
-void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
-void cgroup_rstat_flush(struct cgroup *cgrp);
+void css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
+void css_rstat_flush(struct cgroup_subsys_state *css);
  
  /*
   * Basic resource stats.
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h

index 95ab39e1ec8f0b5d675435ff170a2a0b2080bbd3..c161d34be63480169261d10081a470986d5e3cb1 100644 (file)
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -270,8 +270,8 @@ int cgroup_task_count(const struct cgroup *cgrp);
  /*
   * rstat.c
   */
-int cgroup_rstat_init(struct cgroup *cgrp);
-void cgroup_rstat_exit(struct cgroup *cgrp);
+int css_rstat_init(struct cgroup_subsys_state *css);
+void css_rstat_exit(struct cgroup_subsys_state *css);
  void cgroup_rstat_boot(void);
  void cgroup_base_stat_cputime_show(struct seq_file *seq);
  
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c

index d98994b106ed578173e5ea5e712551192dca042f..c284df1efc9fdd9108465e60e75c229b8873a202 100644 (file)
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1362,7 +1362,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
  
         cgroup_unlock();
  
-       cgroup_rstat_exit(cgrp);
+       css_rstat_exit(&cgrp->self);
         kernfs_destroy_root(root->kf_root);
         cgroup_free_root(root);
  }
@@ -2136,7 +2136,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
         if (ret)
                 goto destroy_root;
  
-       ret = cgroup_rstat_init(root_cgrp);
+       ret = css_rstat_init(&root_cgrp->self);
         if (ret)
                 goto destroy_root;
  
@@ -2178,7 +2178,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
         goto out;
  
  exit_stats:
-       cgroup_rstat_exit(root_cgrp);
+       css_rstat_exit(&root_cgrp->self);
  destroy_root:
         kernfs_destroy_root(root->kf_root);
         root->kf_root = NULL;
@@ -5435,7 +5435,7 @@ static void css_free_rwork_fn(struct work_struct *work)
                         cgroup_put(cgroup_parent(cgrp));
                         kernfs_put(cgrp->kn);
                         psi_cgroup_free(cgrp);
-                       cgroup_rstat_exit(cgrp);
+                       css_rstat_exit(css);
                         kfree(cgrp);
                 } else {
                         /*
@@ -5465,7 +5465,7 @@ static void css_release_work_fn(struct work_struct *work)
  
                 /* css release path */
                 if (!list_empty(&css->rstat_css_node)) {
-                       cgroup_rstat_flush(cgrp);
+                       css_rstat_flush(css);
                         list_del_rcu(&css->rstat_css_node);
                 }
  
@@ -5493,7 +5493,7 @@ static void css_release_work_fn(struct work_struct *work)
                 /* cgroup release path */
                 TRACE_CGROUP_PATH(release, cgrp);
  
-               cgroup_rstat_flush(cgrp);
+               css_rstat_flush(css);
  
                 spin_lock_irq(&css_set_lock);
                 for (tcgrp = cgroup_parent(cgrp); tcgrp;
@@ -5686,17 +5686,13 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
         if (ret)
                 goto out_free_cgrp;
  
-       ret = cgroup_rstat_init(cgrp);
-       if (ret)
-               goto out_cancel_ref;
-
         /* create the directory */
         kn = kernfs_create_dir_ns(parent->kn, name, mode,
                                   current_fsuid(), current_fsgid(),
                                   cgrp, NULL);
         if (IS_ERR(kn)) {
                 ret = PTR_ERR(kn);
-               goto out_stat_exit;
+               goto out_cancel_ref;
         }
         cgrp->kn = kn;
  
@@ -5706,6 +5702,14 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
         cgrp->root = root;
         cgrp->level = level;
  
+       /*
+        * Now that init_cgroup_housekeeping() has been called and cgrp->self
+        * is setup, it is safe to perform rstat initialization on it.
+        */
+       ret = css_rstat_init(&cgrp->self);
+       if (ret)
+               goto out_stat_exit;
+
         ret = psi_cgroup_alloc(cgrp);
         if (ret)
                 goto out_kernfs_remove;
@@ -5776,10 +5780,10 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
  
  out_psi_free:
         psi_cgroup_free(cgrp);
+out_stat_exit:
+       css_rstat_exit(&cgrp->self);
  out_kernfs_remove:
         kernfs_remove(cgrp->kn);
-out_stat_exit:
-       cgroup_rstat_exit(cgrp);
  out_cancel_ref:
         percpu_ref_exit(&cgrp->self.refcnt);
  out_free_cgrp:
diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c

index 918931f68de35a218b29a7a554f72611c338f80f..4a8834a70ca66933f6a00cb5a664e5cd4877b6a9 100644 (file)
--- a/kernel/cgroup/rstat.c
+++ b/kernel/cgroup/rstat.c
@@ -34,9 +34,10 @@ static struct cgroup_rstat_base_cpu *cgroup_rstat_base_cpu(
   * operations without handling high-frequency fast-path "update" events.
   */
  static __always_inline
-unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
-                                    struct cgroup *cgrp, const bool fast_path)
+unsigned long _css_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
+                                    struct cgroup_subsys_state *css, const bool fast_path)
  {
+       struct cgroup *cgrp = css->cgroup;
         unsigned long flags;
         bool contended;
  
@@ -67,10 +68,12 @@ unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
  }
  
  static __always_inline
-void _cgroup_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
-                             struct cgroup *cgrp, unsigned long flags,
+void _css_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
+                             struct cgroup_subsys_state *css, unsigned long flags,
                               const bool fast_path)
  {
+       struct cgroup *cgrp = css->cgroup;
+
         if (fast_path)
                 trace_cgroup_rstat_cpu_unlock_fastpath(cgrp, cpu, false);
         else
@@ -80,16 +83,17 @@ void _cgroup_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
  }
  
  /**
- * cgroup_rstat_updated - keep track of updated rstat_cpu
- * @cgrp: target cgroup
+ * css_rstat_updated - keep track of updated rstat_cpu
+ * @css: target cgroup subsystem state
   * @cpu: cpu on which rstat_cpu was updated
   *
- * @cgrp's rstat_cpu on @cpu was updated.  Put it on the parent's matching
- * rstat_cpu->updated_children list.  See the comment on top of
+ * @css->cgroup's rstat_cpu on @cpu was updated. Put it on the parent's
+ * matching rstat_cpu->updated_children list. See the comment on top of
   * cgroup_rstat_cpu definition for details.
   */
-__bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
+__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
  {
+       struct cgroup *cgrp = css->cgroup;
         raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
         unsigned long flags;
  
@@ -104,7 +108,7 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
         if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next))
                 return;
  
-       flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, cgrp, true);
+       flags = _css_rstat_cpu_lock(cpu_lock, cpu, css, true);
  
         /* put @cgrp and all ancestors on the corresponding updated lists */
         while (true) {
@@ -132,7 +136,7 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
                 cgrp = parent;
         }
  
-       _cgroup_rstat_cpu_unlock(cpu_lock, cpu, cgrp, flags, true);
+       _css_rstat_cpu_unlock(cpu_lock, cpu, css, flags, true);
  }
  
  /**
@@ -213,7 +217,7 @@ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
         struct cgroup *head = NULL, *parent, *child;
         unsigned long flags;
  
-       flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, root, false);
+       flags = _css_rstat_cpu_lock(cpu_lock, cpu, &root->self, false);
  
         /* Return NULL if this subtree is not on-list */
         if (!rstatc->updated_next)
@@ -250,14 +254,14 @@ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
         if (child != root)
                 head = cgroup_rstat_push_children(head, child, cpu);
  unlock_ret:
-       _cgroup_rstat_cpu_unlock(cpu_lock, cpu, root, flags, false);
+       _css_rstat_cpu_unlock(cpu_lock, cpu, &root->self, flags, false);
         return head;
  }
  
  /*
   * A hook for bpf stat collectors to attach to and flush their stats.
- * Together with providing bpf kfuncs for cgroup_rstat_updated() and
- * cgroup_rstat_flush(), this enables a complete workflow where bpf progs that
+ * Together with providing bpf kfuncs for css_rstat_updated() and
+ * css_rstat_flush(), this enables a complete workflow where bpf progs that
   * collect cgroup stats can integrate with rstat for efficient flushing.
   *
   * A static noinline declaration here could cause the compiler to optimize away
@@ -285,9 +289,11 @@ __bpf_hook_end();
   * value -1 is used when obtaining the main lock else this is the CPU
   * number processed last.
   */
-static inline void __cgroup_rstat_lock(struct cgroup *cgrp, int cpu_in_loop)
+static inline void __css_rstat_lock(struct cgroup_subsys_state *css,
+               int cpu_in_loop)
         __acquires(&cgroup_rstat_lock)
  {
+       struct cgroup *cgrp = css->cgroup;
         bool contended;
  
         contended = !spin_trylock_irq(&cgroup_rstat_lock);
@@ -298,28 +304,32 @@ static inline void __cgroup_rstat_lock(struct cgroup *cgrp, int cpu_in_loop)
         trace_cgroup_rstat_locked(cgrp, cpu_in_loop, contended);
  }
  
-static inline void __cgroup_rstat_unlock(struct cgroup *cgrp, int cpu_in_loop)
+static inline void __css_rstat_unlock(struct cgroup_subsys_state *css,
+                                     int cpu_in_loop)
         __releases(&cgroup_rstat_lock)
  {
+       struct cgroup *cgrp = css->cgroup;
+
         trace_cgroup_rstat_unlock(cgrp, cpu_in_loop, false);
         spin_unlock_irq(&cgroup_rstat_lock);
  }
  
  /**
- * cgroup_rstat_flush - flush stats in @cgrp's subtree
- * @cgrp: target cgroup
+ * css_rstat_flush - flush stats in @css->cgroup's subtree
+ * @css: target cgroup subsystem state
   *
- * Collect all per-cpu stats in @cgrp's subtree into the global counters
+ * Collect all per-cpu stats in @css->cgroup's subtree into the global counters
   * and propagate them upwards.  After this function returns, all cgroups in
   * the subtree have up-to-date ->stat.
   *
- * This also gets all cgroups in the subtree including @cgrp off the
+ * This also gets all cgroups in the subtree including @css->cgroup off the
   * ->updated_children lists.
   *
   * This function may block.
   */
-__bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
+__bpf_kfunc void css_rstat_flush(struct cgroup_subsys_state *css)
  {
+       struct cgroup *cgrp = css->cgroup;
         int cpu;
  
         might_sleep();
@@ -327,7 +337,7 @@ __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
                 struct cgroup *pos;
  
                 /* Reacquire for each CPU to avoid disabling IRQs too long */
-               __cgroup_rstat_lock(cgrp, cpu);
+               __css_rstat_lock(css, cpu);
                 pos = cgroup_rstat_updated_list(cgrp, cpu);
                 for (; pos; pos = pos->rstat_flush_next) {
                         struct cgroup_subsys_state *css;
@@ -341,14 +351,15 @@ __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
                                 css->ss->css_rstat_flush(css, cpu);
                         rcu_read_unlock();
                 }
-               __cgroup_rstat_unlock(cgrp, cpu);
+               __css_rstat_unlock(css, cpu);
                 if (!cond_resched())
                         cpu_relax();
         }
  }
  
-int cgroup_rstat_init(struct cgroup *cgrp)
+int css_rstat_init(struct cgroup_subsys_state *css)
  {
+       struct cgroup *cgrp = css->cgroup;
         int cpu;
  
         /* the root cgrp has rstat_cpu preallocated */
@@ -379,11 +390,12 @@ int cgroup_rstat_init(struct cgroup *cgrp)
         return 0;
  }
  
-void cgroup_rstat_exit(struct cgroup *cgrp)
+void css_rstat_exit(struct cgroup_subsys_state *css)
  {
+       struct cgroup *cgrp = css->cgroup;
         int cpu;
  
-       cgroup_rstat_flush(cgrp);
+       css_rstat_flush(&cgrp->self);
  
         /* sanity check */
         for_each_possible_cpu(cpu) {
@@ -490,7 +502,7 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
                                                  unsigned long flags)
  {
         u64_stats_update_end_irqrestore(&rstatbc->bsync, flags);
-       cgroup_rstat_updated(cgrp, smp_processor_id());
+       css_rstat_updated(&cgrp->self, smp_processor_id());
         put_cpu_ptr(rstatbc);
  }
  
@@ -592,12 +604,12 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
         struct cgroup_base_stat bstat;
  
         if (cgroup_parent(cgrp)) {
-               cgroup_rstat_flush(cgrp);
-               __cgroup_rstat_lock(cgrp, -1);
+               css_rstat_flush(&cgrp->self);
+               __css_rstat_lock(&cgrp->self, -1);
                 bstat = cgrp->bstat;
                 cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
                                &bstat.cputime.utime, &bstat.cputime.stime);
-               __cgroup_rstat_unlock(cgrp, -1);
+               __css_rstat_unlock(&cgrp->self, -1);
         } else {
                 root_cgroup_cputime(&bstat);
         }
@@ -619,10 +631,10 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
         cgroup_force_idle_show(seq, &bstat);
  }
  
-/* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */
+/* Add bpf kfuncs for css_rstat_updated() and css_rstat_flush() */
  BTF_KFUNCS_START(bpf_rstat_kfunc_ids)
-BTF_ID_FLAGS(func, cgroup_rstat_updated)
-BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, css_rstat_updated)
+BTF_ID_FLAGS(func, css_rstat_flush, KF_SLEEPABLE)
  BTF_KFUNCS_END(bpf_rstat_kfunc_ids)
  
  static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 83c2df73e4b6484806fcd9a5a701b319b22b7ade..7152d9623cc8e9b3158d9066e018c140c8aa2a5a 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -579,7 +579,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
         if (!val)
                 return;
  
-       cgroup_rstat_updated(memcg->css.cgroup, cpu);
+       css_rstat_updated(&memcg->css, cpu);
         statc = this_cpu_ptr(memcg->vmstats_percpu);
         for (; statc; statc = statc->parent) {
                 stats_updates = READ_ONCE(statc->stats_updates) + abs(val);
@@ -611,7 +611,7 @@ static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg, bool force)
         if (mem_cgroup_is_root(memcg))
                 WRITE_ONCE(flush_last_time, jiffies_64);
  
-       cgroup_rstat_flush(memcg->css.cgroup);
+       css_rstat_flush(&memcg->css);
  }
  
  /*
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c

index c74362854948d87da496cbc729db02b1bface2a2..ff189a736ad8c563dcb595e9a98c80ed86c3317e 100644 (file)
--- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -37,8 +37,9 @@ struct {
         __type(value, struct attach_counter);
  } attach_counters SEC(".maps");
  
-extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
-extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
+extern void css_rstat_updated(
+               struct cgroup_subsys_state *css, int cpu) __ksym;
+extern void css_rstat_flush(struct cgroup_subsys_state *css) __ksym;
  
  static uint64_t cgroup_id(struct cgroup *cgrp)
  {
@@ -75,7 +76,7 @@ int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
         else if (create_percpu_attach_counter(cg_id, 1))
                 return 0;
  
-       cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
+       css_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id());
         return 0;
  }
  
@@ -141,7 +142,7 @@ int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
                 return 1;
  
         /* Flush the stats to make sure we get the most updated numbers */
-       cgroup_rstat_flush(cgrp);
+       css_rstat_flush(&cgrp->self);
  
         total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
         if (!total_counter) {
author	JP Kobryn <inwardvessel@gmail.com>
	Fri, 4 Apr 2025 01:10:48 +0000 (18:10 -0700)
committer	Tejun Heo <tj@kernel.org>
	Fri, 4 Apr 2025 20:06:25 +0000 (10:06 -1000)
block/blk-cgroup.c		patch \| blob \| history
include/linux/cgroup-defs.h		patch \| blob \| history
include/linux/cgroup.h		patch \| blob \| history
kernel/cgroup/cgroup-internal.h		patch \| blob \| history
kernel/cgroup/cgroup.c		patch \| blob \| history
kernel/cgroup/rstat.c		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c		patch \| blob \| history