]> www.infradead.org Git - users/hch/misc.git/commitdiff
cgroup: cgroup.stat.local time accounting
authorTiffany Yang <ynaffit@google.com>
Fri, 22 Aug 2025 01:37:52 +0000 (18:37 -0700)
committerTejun Heo <tj@kernel.org>
Fri, 22 Aug 2025 17:50:43 +0000 (07:50 -1000)
There isn't yet a clear way to identify a set of "lost" time that
everyone (or at least a wider group of users) cares about. However,
users can perform some delay accounting by iterating over components of
interest. This patch allows cgroup v2 freezing time to be one of those
components.

Track the cumulative time that each v2 cgroup spends freezing and expose
it to userland via a new local stat file in cgroupfs. Thank you to
Michal, who provided the ASCII art in the updated documentation.

To access this value:
  $ mkdir /sys/fs/cgroup/test
  $ cat /sys/fs/cgroup/test/cgroup.stat.local
  freeze_time_total 0

Ensure consistent freeze time reads with freeze_seq, a per-cgroup
sequence counter. Writes are serialized using the css_set_lock.

Signed-off-by: Tiffany Yang <ynaffit@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Documentation/admin-guide/cgroup-v2.rst
include/linux/cgroup-defs.h
kernel/cgroup/cgroup.c
kernel/cgroup/freezer.c

index d9d3cc7df3488e4c63d556d6c7cc52c7c3f23dd0..9a3a909ee40b28c89199a487b152758c05fc42ad 100644 (file)
@@ -1001,6 +1001,24 @@ All cgroup core files are prefixed with "cgroup."
                Total number of dying cgroup subsystems (e.g. memory
                cgroup) at and beneath the current cgroup.
 
+  cgroup.stat.local
+       A read-only flat-keyed file which exists in non-root cgroups.
+       The following entry is defined:
+
+         frozen_usec
+               Cumulative time that this cgroup has spent between freezing and
+               thawing, regardless of whether by self or ancestor groups.
+               NB: (not) reaching "frozen" state is not accounted here.
+
+               Using the following ASCII representation of a cgroup's freezer
+               state, ::
+
+                              1    _____
+                       frozen 0 __/     \__
+                                 ab    cd
+
+               the duration being measured is the span between a and c.
+
   cgroup.freeze
        A read-write single value file which exists on non-root cgroups.
        Allowed values are "0" and "1". The default is "0".
index 6b93a64115fe9445d843bc274adbecbc64d53b74..539c64eeef38f3f94d3dc81fc872ed43fc6ef4fe 100644 (file)
@@ -433,6 +433,23 @@ struct cgroup_freezer_state {
         * frozen, SIGSTOPped, and PTRACEd.
         */
        int nr_frozen_tasks;
+
+       /* Freeze time data consistency protection */
+       seqcount_t freeze_seq;
+
+       /*
+        * Most recent time the cgroup was requested to freeze.
+        * Accesses guarded by freeze_seq counter. Writes serialized
+        * by css_set_lock.
+        */
+       u64 freeze_start_nsec;
+
+       /*
+        * Total duration the cgroup has spent freezing.
+        * Accesses guarded by freeze_seq counter. Writes serialized
+        * by css_set_lock.
+        */
+       u64 frozen_nsec;
 };
 
 struct cgroup {
index 312c6a8b55bb73d2780ca4b3ed02669bb3062bb9..ab096b884bbc73fcdabe1c456bad2cd9ebb9f8cc 100644 (file)
@@ -3763,6 +3763,27 @@ static int cgroup_stat_show(struct seq_file *seq, void *v)
        return 0;
 }
 
+static int cgroup_core_local_stat_show(struct seq_file *seq, void *v)
+{
+       struct cgroup *cgrp = seq_css(seq)->cgroup;
+       unsigned int sequence;
+       u64 freeze_time;
+
+       do {
+               sequence = read_seqcount_begin(&cgrp->freezer.freeze_seq);
+               freeze_time = cgrp->freezer.frozen_nsec;
+               /* Add in current freezer interval if the cgroup is freezing. */
+               if (test_bit(CGRP_FREEZE, &cgrp->flags))
+                       freeze_time += (ktime_get_ns() -
+                                       cgrp->freezer.freeze_start_nsec);
+       } while (read_seqcount_retry(&cgrp->freezer.freeze_seq, sequence));
+
+       seq_printf(seq, "frozen_usec %llu\n",
+                  (unsigned long long) freeze_time / NSEC_PER_USEC);
+
+       return 0;
+}
+
 #ifdef CONFIG_CGROUP_SCHED
 /**
  * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem
@@ -5354,6 +5375,11 @@ static struct cftype cgroup_base_files[] = {
                .name = "cgroup.stat",
                .seq_show = cgroup_stat_show,
        },
+       {
+               .name = "cgroup.stat.local",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = cgroup_core_local_stat_show,
+       },
        {
                .name = "cgroup.freeze",
                .flags = CFTYPE_NOT_ON_ROOT,
@@ -5763,6 +5789,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
         * if the parent has to be frozen, the child has too.
         */
        cgrp->freezer.e_freeze = parent->freezer.e_freeze;
+       seqcount_init(&cgrp->freezer.freeze_seq);
        if (cgrp->freezer.e_freeze) {
                /*
                 * Set the CGRP_FREEZE flag, so when a process will be
@@ -5771,6 +5798,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
                 * consider it frozen immediately.
                 */
                set_bit(CGRP_FREEZE, &cgrp->flags);
+               cgrp->freezer.freeze_start_nsec = ktime_get_ns();
                set_bit(CGRP_FROZEN, &cgrp->flags);
        }
 
index bf1690a167dda0ae474e2417d99d2a5f5b93dca7..6c18854bff34853c4e29e1aa92162ca74813b963 100644 (file)
@@ -171,7 +171,7 @@ static void cgroup_freeze_task(struct task_struct *task, bool freeze)
 /*
  * Freeze or unfreeze all tasks in the given cgroup.
  */
-static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
+static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze, u64 ts_nsec)
 {
        struct css_task_iter it;
        struct task_struct *task;
@@ -179,10 +179,16 @@ static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
        lockdep_assert_held(&cgroup_mutex);
 
        spin_lock_irq(&css_set_lock);
-       if (freeze)
+       write_seqcount_begin(&cgrp->freezer.freeze_seq);
+       if (freeze) {
                set_bit(CGRP_FREEZE, &cgrp->flags);
-       else
+               cgrp->freezer.freeze_start_nsec = ts_nsec;
+       } else {
                clear_bit(CGRP_FREEZE, &cgrp->flags);
+               cgrp->freezer.frozen_nsec += (ts_nsec -
+                       cgrp->freezer.freeze_start_nsec);
+       }
+       write_seqcount_end(&cgrp->freezer.freeze_seq);
        spin_unlock_irq(&css_set_lock);
 
        if (freeze)
@@ -260,6 +266,7 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze)
        struct cgroup *parent;
        struct cgroup *dsct;
        bool applied = false;
+       u64 ts_nsec;
        bool old_e;
 
        lockdep_assert_held(&cgroup_mutex);
@@ -271,6 +278,7 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze)
                return;
 
        cgrp->freezer.freeze = freeze;
+       ts_nsec = ktime_get_ns();
 
        /*
         * Propagate changes downwards the cgroup tree.
@@ -298,7 +306,7 @@ void cgroup_freeze(struct cgroup *cgrp, bool freeze)
                /*
                 * Do change actual state: freeze or unfreeze.
                 */
-               cgroup_do_freeze(dsct, freeze);
+               cgroup_do_freeze(dsct, freeze, ts_nsec);
                applied = true;
        }