cgroups: add per-thread subsystem callbacks

author Ben Blum <bblum@andrew.cmu.edu>

Thu, 26 May 2011 23:25:19 +0000 (16:25 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 27 May 2011 00:12:34 +0000 (17:12 -0700)
author Ben Blum <bblum@andrew.cmu.edu>
Thu, 26 May 2011 23:25:19 +0000 (16:25 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 27 May 2011 00:12:34 +0000 (17:12 -0700)
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt

index aedf1bd02fddd0a4427902e496949ba337423e3d..b3bd3bdbe202614cdeb6910b9474addb7482818d 100644 (file)
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -575,7 +575,7 @@ rmdir() will fail with it. From this behavior, pre_destroy() can be
  called multiple times against a cgroup.
  
  int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-              struct task_struct *task, bool threadgroup)
+              struct task_struct *task)
  (cgroup_mutex held by caller)
  
  Called prior to moving a task into a cgroup; if the subsystem
@@ -584,9 +584,14 @@ task is passed, then a successful result indicates that *any*
  unspecified task can be moved into the cgroup. Note that this isn't
  called on a fork. If this method returns 0 (success) then this should
  remain valid while the caller holds cgroup_mutex and it is ensured that either
-attach() or cancel_attach() will be called in future. If threadgroup is
-true, then a successful result indicates that all threads in the given
-thread's threadgroup can be moved together.
+attach() or cancel_attach() will be called in future.
+
+int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk);
+(cgroup_mutex held by caller)
+
+As can_attach, but for operations that must be run once per task to be
+attached (possibly many when using cgroup_attach_proc). Called after
+can_attach.
  
  void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
                struct task_struct *task, bool threadgroup)
@@ -598,15 +603,24 @@ function, so that the subsystem can implement a rollback. If not, not necessary.
  This will be called only about subsystems whose can_attach() operation have
  succeeded.
  
+void pre_attach(struct cgroup *cgrp);
+(cgroup_mutex held by caller)
+
+For any non-per-thread attachment work that needs to happen before
+attach_task. Needed by cpuset.
+
  void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-           struct cgroup *old_cgrp, struct task_struct *task,
-           bool threadgroup)
+           struct cgroup *old_cgrp, struct task_struct *task)
  (cgroup_mutex held by caller)
  
  Called after the task has been attached to the cgroup, to allow any
  post-attachment activity that requires memory allocations or blocking.
-If threadgroup is true, the subsystem should take care of all threads
-in the specified thread's threadgroup. Currently does not support any
+
+void attach_task(struct cgroup *cgrp, struct task_struct *tsk);
+(cgroup_mutex held by caller)
+
+As attach, but for operations that must be run once per task to be attached,
+like can_attach_task. Called before attach. Currently does not support any
  subsystem that might need the old_cgrp for every thread in the group.
  
  void fork(struct cgroup_subsy *ss, struct task_struct *task)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c

index 07371cfdfae607c97e7600511fe1c9e7fa13a3d1..bcaf16ee6ad1ba321836413ee8841ab0bbde8f5e 100644 (file)
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -30,10 +30,8 @@ EXPORT_SYMBOL_GPL(blkio_root_cgroup);
  
  static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
                                                   struct cgroup *);
-static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
-                             struct task_struct *, bool);
-static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
-                          struct cgroup *, struct task_struct *, bool);
+static int blkiocg_can_attach_task(struct cgroup *, struct task_struct *);
+static void blkiocg_attach_task(struct cgroup *, struct task_struct *);
  static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
  static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
  
@@ -46,8 +44,8 @@ static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
  struct cgroup_subsys blkio_subsys = {
         .name = "blkio",
         .create = blkiocg_create,
-       .can_attach = blkiocg_can_attach,
-       .attach = blkiocg_attach,
+       .can_attach_task = blkiocg_can_attach_task,
+       .attach_task = blkiocg_attach_task,
         .destroy = blkiocg_destroy,
         .populate = blkiocg_populate,
  #ifdef CONFIG_BLK_CGROUP
@@ -1616,9 +1614,7 @@ done:
   * of the main cic data structures.  For now we allow a task to change
   * its cgroup only if it's the only owner of its ioc.
   */
-static int blkiocg_can_attach(struct cgroup_subsys *subsys,
-                               struct cgroup *cgroup, struct task_struct *tsk,
-                               bool threadgroup)
+static int blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
  {
         struct io_context *ioc;
         int ret = 0;
@@ -1633,9 +1629,7 @@ static int blkiocg_can_attach(struct cgroup_subsys *subsys,
         return ret;
  }
  
-static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
-                               struct cgroup *prev, struct task_struct *tsk,
-                               bool threadgroup)
+static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
  {
         struct io_context *ioc;
  
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h

index 5ac7ebc36dbb335604acfa74007a325fc0c7d658..1e6cde21fa3fc98e09c1e1e4791dc469b8fc5259 100644 (file)
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -467,12 +467,14 @@ struct cgroup_subsys {
         int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
         void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
         int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                         struct task_struct *tsk, bool threadgroup);
+                         struct task_struct *tsk);
+       int (*can_attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
         void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                         struct task_struct *tsk, bool threadgroup);
+                             struct task_struct *tsk);
+       void (*pre_attach)(struct cgroup *cgrp);
+       void (*attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
         void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                       struct cgroup *old_cgrp, struct task_struct *tsk,
-                       bool threadgroup);
+                      struct cgroup *old_cgrp, struct task_struct *tsk);
         void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
         void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp,
                         struct cgroup *old_cgrp, struct task_struct *task);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 909a35510af53656c684021b113977c10bfbbac8..38fb0ad1cb46f1c9c90e23ada5930b3a6194b994 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1759,7 +1759,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
  
         for_each_subsys(root, ss) {
                 if (ss->can_attach) {
-                       retval = ss->can_attach(ss, cgrp, tsk, false);
+                       retval = ss->can_attach(ss, cgrp, tsk);
                         if (retval) {
                                 /*
                                  * Remember on which subsystem the can_attach()
@@ -1771,6 +1771,13 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
                                 goto out;
                         }
                 }
+               if (ss->can_attach_task) {
+                       retval = ss->can_attach_task(cgrp, tsk);
+                       if (retval) {
+                               failed_ss = ss;
+                               goto out;
+                       }
+               }
         }
  
         task_lock(tsk);
@@ -1805,8 +1812,12 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
         write_unlock(&css_set_lock);
  
         for_each_subsys(root, ss) {
+               if (ss->pre_attach)
+                       ss->pre_attach(cgrp);
+               if (ss->attach_task)
+                       ss->attach_task(cgrp, tsk);
                 if (ss->attach)
-                       ss->attach(ss, cgrp, oldcgrp, tsk, false);
+                       ss->attach(ss, cgrp, oldcgrp, tsk);
         }
         set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
         synchronize_rcu();
@@ -1829,7 +1840,7 @@ out:
                                  */
                                 break;
                         if (ss->cancel_attach)
-                               ss->cancel_attach(ss, cgrp, tsk, false);
+                               ss->cancel_attach(ss, cgrp, tsk);
                 }
         }
         return retval;
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c

index e7bebb7c6c38ff7ddaefb807d7e50cd1bcc1adbc..e691818d7e450f5f8785b8b97a08cdbb2f4e1493 100644 (file)
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -160,7 +160,7 @@ static void freezer_destroy(struct cgroup_subsys *ss,
   */
  static int freezer_can_attach(struct cgroup_subsys *ss,
                               struct cgroup *new_cgroup,
-                             struct task_struct *task, bool threadgroup)
+                             struct task_struct *task)
  {
         struct freezer *freezer;
  
@@ -172,26 +172,17 @@ static int freezer_can_attach(struct cgroup_subsys *ss,
         if (freezer->state != CGROUP_THAWED)
                 return -EBUSY;
  
+       return 0;
+}
+
+static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+{
         rcu_read_lock();
-       if (__cgroup_freezing_or_frozen(task)) {
+       if (__cgroup_freezing_or_frozen(tsk)) {
                 rcu_read_unlock();
                 return -EBUSY;
         }
         rcu_read_unlock();
-
-       if (threadgroup) {
-               struct task_struct *c;
-
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
-                       if (__cgroup_freezing_or_frozen(c)) {
-                               rcu_read_unlock();
-                               return -EBUSY;
-                       }
-               }
-               rcu_read_unlock();
-       }
-
         return 0;
  }
  
@@ -390,6 +381,9 @@ struct cgroup_subsys freezer_subsys = {
         .populate       = freezer_populate,
         .subsys_id      = freezer_subsys_id,
         .can_attach     = freezer_can_attach,
+       .can_attach_task = freezer_can_attach_task,
+       .pre_attach     = NULL,
+       .attach_task    = NULL,
         .attach         = NULL,
         .fork           = freezer_fork,
         .exit           = NULL,
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index 2bb8c2e98fff4074bce131a145966bfb1c788866..55b297d78adcf291586280c8a037a9a26e32cfef 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1367,14 +1367,10 @@ static int fmeter_getrate(struct fmeter *fmp)
         return val;
  }
  
-/* Protected by cgroup_lock */
-static cpumask_var_t cpus_attach;
-
  /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
  static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                            struct task_struct *tsk, bool threadgroup)
+                            struct task_struct *tsk)
  {
-       int ret;
         struct cpuset *cs = cgroup_cs(cont);
  
         if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
@@ -1391,29 +1387,42 @@ static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
         if (tsk->flags & PF_THREAD_BOUND)
                 return -EINVAL;
  
-       ret = security_task_setscheduler(tsk);
-       if (ret)
-               return ret;
-       if (threadgroup) {
-               struct task_struct *c;
-
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       ret = security_task_setscheduler(c);
-                       if (ret) {
-                               rcu_read_unlock();
-                               return ret;
-                       }
-               }
-               rcu_read_unlock();
-       }
         return 0;
  }
  
-static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
-                              struct cpuset *cs)
+static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
+{
+       return security_task_setscheduler(task);
+}
+
+/*
+ * Protected by cgroup_lock. The nodemasks must be stored globally because
+ * dynamically allocating them is not allowed in pre_attach, and they must
+ * persist among pre_attach, attach_task, and attach.
+ */
+static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_from;
+static nodemask_t cpuset_attach_nodemask_to;
+
+/* Set-up work for before attaching each task. */
+static void cpuset_pre_attach(struct cgroup *cont)
+{
+       struct cpuset *cs = cgroup_cs(cont);
+
+       if (cs == &top_cpuset)
+               cpumask_copy(cpus_attach, cpu_possible_mask);
+       else
+               guarantee_online_cpus(cs, cpus_attach);
+
+       guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+}
+
+/* Per-thread attachment work. */
+static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
  {
         int err;
+       struct cpuset *cs = cgroup_cs(cont);
+
         /*
          * can_attach beforehand should guarantee that this doesn't fail.
          * TODO: have a better way to handle failure here
@@ -1421,45 +1430,29 @@ static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
         err = set_cpus_allowed_ptr(tsk, cpus_attach);
         WARN_ON_ONCE(err);
  
-       cpuset_change_task_nodemask(tsk, to);
+       cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
         cpuset_update_task_spread_flag(cs, tsk);
-
  }
  
  static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                         struct cgroup *oldcont, struct task_struct *tsk,
-                         bool threadgroup)
+                         struct cgroup *oldcont, struct task_struct *tsk)
  {
         struct mm_struct *mm;
         struct cpuset *cs = cgroup_cs(cont);
         struct cpuset *oldcs = cgroup_cs(oldcont);
-       static nodemask_t to;           /* protected by cgroup_mutex */
  
-       if (cs == &top_cpuset) {
-               cpumask_copy(cpus_attach, cpu_possible_mask);
-       } else {
-               guarantee_online_cpus(cs, cpus_attach);
-       }
-       guarantee_online_mems(cs, &to);
-
-       /* do per-task migration stuff possibly for each in the threadgroup */
-       cpuset_attach_task(tsk, &to, cs);
-       if (threadgroup) {
-               struct task_struct *c;
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       cpuset_attach_task(c, &to, cs);
-               }
-               rcu_read_unlock();
-       }
-
-       /* change mm; only needs to be done once even if threadgroup */
-       to = cs->mems_allowed;
+       /*
+        * Change mm, possibly for multiple threads in a threadgroup. This is
+        * expensive and may sleep.
+        */
+       cpuset_attach_nodemask_from = oldcs->mems_allowed;
+       cpuset_attach_nodemask_to = cs->mems_allowed;
         mm = get_task_mm(tsk);
         if (mm) {
-               mpol_rebind_mm(mm, &to);
+               mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
                 if (is_memory_migrate(cs))
-                       cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
+                       cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
+                                         &cpuset_attach_nodemask_to);
                 mmput(mm);
         }
  }
@@ -1911,6 +1904,9 @@ struct cgroup_subsys cpuset_subsys = {
         .create = cpuset_create,
         .destroy = cpuset_destroy,
         .can_attach = cpuset_can_attach,
+       .can_attach_task = cpuset_can_attach_task,
+       .pre_attach = cpuset_pre_attach,
+       .attach_task = cpuset_attach_task,
         .attach = cpuset_attach,
         .populate = cpuset_populate,
         .post_clone = cpuset_post_clone,
diff --git a/kernel/sched.c b/kernel/sched.c

index 2d12893b8b0f0788dacb310f988cefd7d6d15440..5e43e9dc65d1c197aa9b085a4ccf546551570177 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -8764,42 +8764,10 @@ cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
         return 0;
  }
  
-static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                     struct task_struct *tsk, bool threadgroup)
-{
-       int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
-       if (retval)
-               return retval;
-       if (threadgroup) {
-               struct task_struct *c;
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       retval = cpu_cgroup_can_attach_task(cgrp, c);
-                       if (retval) {
-                               rcu_read_unlock();
-                               return retval;
-                       }
-               }
-               rcu_read_unlock();
-       }
-       return 0;
-}
-
  static void
-cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                 struct cgroup *old_cont, struct task_struct *tsk,
-                 bool threadgroup)
+cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
  {
         sched_move_task(tsk);
-       if (threadgroup) {
-               struct task_struct *c;
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       sched_move_task(c);
-               }
-               rcu_read_unlock();
-       }
  }
  
  static void
@@ -8887,8 +8855,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
         .name           = "cpu",
         .create         = cpu_cgroup_create,
         .destroy        = cpu_cgroup_destroy,
-       .can_attach     = cpu_cgroup_can_attach,
-       .attach         = cpu_cgroup_attach,
+       .can_attach_task = cpu_cgroup_can_attach_task,
+       .attach_task    = cpu_cgroup_attach_task,
         .exit           = cpu_cgroup_exit,
         .populate       = cpu_cgroup_populate,
         .subsys_id      = cpu_cgroup_subsys_id,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index d5fd3dcd3f2e1f38278a73d8131cd80d7f6adc6d..fc259926c1709754336b22bcb762ef905b65ec1e 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4953,8 +4953,7 @@ static void mem_cgroup_clear_mc(void)
  
  static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
                                 struct cgroup *cgroup,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
  {
         int ret = 0;
         struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup);
@@ -4993,8 +4992,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
  
  static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
                                 struct cgroup *cgroup,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
  {
         mem_cgroup_clear_mc();
  }
@@ -5112,8 +5110,7 @@ retry:
  static void mem_cgroup_move_task(struct cgroup_subsys *ss,
                                 struct cgroup *cont,
                                 struct cgroup *old_cont,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
  {
         struct mm_struct *mm;
  
@@ -5131,22 +5128,19 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
  #else  /* !CONFIG_MMU */
  static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
                                 struct cgroup *cgroup,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
  {
         return 0;
  }
  static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
                                 struct cgroup *cgroup,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
  {
  }
  static void mem_cgroup_move_task(struct cgroup_subsys *ss,
                                 struct cgroup *cont,
                                 struct cgroup *old_cont,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
  {
  }
  #endif
diff --git a/security/device_cgroup.c b/security/device_cgroup.c

index 8d9c48f13774b8d35d7bbb8444aafb4681c2d394..cd1f779fa51d38eafc4f75bac0649e592703a7fb 100644 (file)
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -62,8 +62,7 @@ static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
  struct cgroup_subsys devices_subsys;
  
  static int devcgroup_can_attach(struct cgroup_subsys *ss,
-               struct cgroup *new_cgroup, struct task_struct *task,
-               bool threadgroup)
+               struct cgroup *new_cgroup, struct task_struct *task)
  {
         if (current != task && !capable(CAP_SYS_ADMIN))
                         return -EPERM;
author	Ben Blum <bblum@andrew.cmu.edu>
	Thu, 26 May 2011 23:25:19 +0000 (16:25 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 27 May 2011 00:12:34 +0000 (17:12 -0700)
Documentation/cgroups/cgroups.txt		patch \| blob \| history
block/blk-cgroup.c		patch \| blob \| history
include/linux/cgroup.h		patch \| blob \| history
kernel/cgroup.c		patch \| blob \| history
kernel/cgroup_freezer.c		patch \| blob \| history
kernel/cpuset.c		patch \| blob \| history
kernel/sched.c		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
security/device_cgroup.c		patch \| blob \| history