called multiple times against a cgroup.
 
 int can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-              struct task_struct *task, bool threadgroup)
+              struct task_struct *task)
 (cgroup_mutex held by caller)
 
 Called prior to moving a task into a cgroup; if the subsystem
 unspecified task can be moved into the cgroup. Note that this isn't
 called on a fork. If this method returns 0 (success) then this should
 remain valid while the caller holds cgroup_mutex and it is ensured that either
-attach() or cancel_attach() will be called in future. If threadgroup is
-true, then a successful result indicates that all threads in the given
-thread's threadgroup can be moved together.
+attach() or cancel_attach() will be called in future.
+
+int can_attach_task(struct cgroup *cgrp, struct task_struct *tsk);
+(cgroup_mutex held by caller)
+
+As can_attach, but for operations that must be run once per task to be
+attached (possibly many when using cgroup_attach_proc). Called after
+can_attach.
 
 void cancel_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
               struct task_struct *task, bool threadgroup)
 This will be called only about subsystems whose can_attach() operation have
 succeeded.
 
+void pre_attach(struct cgroup *cgrp);
+(cgroup_mutex held by caller)
+
+For any non-per-thread attachment work that needs to happen before
+attach_task. Needed by cpuset.
+
 void attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-           struct cgroup *old_cgrp, struct task_struct *task,
-           bool threadgroup)
+           struct cgroup *old_cgrp, struct task_struct *task)
 (cgroup_mutex held by caller)
 
 Called after the task has been attached to the cgroup, to allow any
 post-attachment activity that requires memory allocations or blocking.
-If threadgroup is true, the subsystem should take care of all threads
-in the specified thread's threadgroup. Currently does not support any
+
+void attach_task(struct cgroup *cgrp, struct task_struct *tsk);
+(cgroup_mutex held by caller)
+
+As attach, but for operations that must be run once per task to be attached,
+like can_attach_task. Called before attach. Currently does not support any
 subsystem that might need the old_cgrp for every thread in the group.
 
 void fork(struct cgroup_subsy *ss, struct task_struct *task)
 
 
 static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
                                                  struct cgroup *);
-static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
-                             struct task_struct *, bool);
-static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
-                          struct cgroup *, struct task_struct *, bool);
+static int blkiocg_can_attach_task(struct cgroup *, struct task_struct *);
+static void blkiocg_attach_task(struct cgroup *, struct task_struct *);
 static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
 static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
 
 struct cgroup_subsys blkio_subsys = {
        .name = "blkio",
        .create = blkiocg_create,
-       .can_attach = blkiocg_can_attach,
-       .attach = blkiocg_attach,
+       .can_attach_task = blkiocg_can_attach_task,
+       .attach_task = blkiocg_attach_task,
        .destroy = blkiocg_destroy,
        .populate = blkiocg_populate,
 #ifdef CONFIG_BLK_CGROUP
  * of the main cic data structures.  For now we allow a task to change
  * its cgroup only if it's the only owner of its ioc.
  */
-static int blkiocg_can_attach(struct cgroup_subsys *subsys,
-                               struct cgroup *cgroup, struct task_struct *tsk,
-                               bool threadgroup)
+static int blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
        struct io_context *ioc;
        int ret = 0;
        return ret;
 }
 
-static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
-                               struct cgroup *prev, struct task_struct *tsk,
-                               bool threadgroup)
+static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
        struct io_context *ioc;
 
 
        int (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
        void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
        int (*can_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                         struct task_struct *tsk, bool threadgroup);
+                         struct task_struct *tsk);
+       int (*can_attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
        void (*cancel_attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                         struct task_struct *tsk, bool threadgroup);
+                             struct task_struct *tsk);
+       void (*pre_attach)(struct cgroup *cgrp);
+       void (*attach_task)(struct cgroup *cgrp, struct task_struct *tsk);
        void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                       struct cgroup *old_cgrp, struct task_struct *tsk,
-                       bool threadgroup);
+                      struct cgroup *old_cgrp, struct task_struct *tsk);
        void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
        void (*exit)(struct cgroup_subsys *ss, struct cgroup *cgrp,
                        struct cgroup *old_cgrp, struct task_struct *task);
 
 
        for_each_subsys(root, ss) {
                if (ss->can_attach) {
-                       retval = ss->can_attach(ss, cgrp, tsk, false);
+                       retval = ss->can_attach(ss, cgrp, tsk);
                        if (retval) {
                                /*
                                 * Remember on which subsystem the can_attach()
                                goto out;
                        }
                }
+               if (ss->can_attach_task) {
+                       retval = ss->can_attach_task(cgrp, tsk);
+                       if (retval) {
+                               failed_ss = ss;
+                               goto out;
+                       }
+               }
        }
 
        task_lock(tsk);
        write_unlock(&css_set_lock);
 
        for_each_subsys(root, ss) {
+               if (ss->pre_attach)
+                       ss->pre_attach(cgrp);
+               if (ss->attach_task)
+                       ss->attach_task(cgrp, tsk);
                if (ss->attach)
-                       ss->attach(ss, cgrp, oldcgrp, tsk, false);
+                       ss->attach(ss, cgrp, oldcgrp, tsk);
        }
        set_bit(CGRP_RELEASABLE, &oldcgrp->flags);
        synchronize_rcu();
                                 */
                                break;
                        if (ss->cancel_attach)
-                               ss->cancel_attach(ss, cgrp, tsk, false);
+                               ss->cancel_attach(ss, cgrp, tsk);
                }
        }
        return retval;
 
  */
 static int freezer_can_attach(struct cgroup_subsys *ss,
                              struct cgroup *new_cgroup,
-                             struct task_struct *task, bool threadgroup)
+                             struct task_struct *task)
 {
        struct freezer *freezer;
 
        if (freezer->state != CGROUP_THAWED)
                return -EBUSY;
 
+       return 0;
+}
+
+static int freezer_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
+{
        rcu_read_lock();
-       if (__cgroup_freezing_or_frozen(task)) {
+       if (__cgroup_freezing_or_frozen(tsk)) {
                rcu_read_unlock();
                return -EBUSY;
        }
        rcu_read_unlock();
-
-       if (threadgroup) {
-               struct task_struct *c;
-
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &task->thread_group, thread_group) {
-                       if (__cgroup_freezing_or_frozen(c)) {
-                               rcu_read_unlock();
-                               return -EBUSY;
-                       }
-               }
-               rcu_read_unlock();
-       }
-
        return 0;
 }
 
        .populate       = freezer_populate,
        .subsys_id      = freezer_subsys_id,
        .can_attach     = freezer_can_attach,
+       .can_attach_task = freezer_can_attach_task,
+       .pre_attach     = NULL,
+       .attach_task    = NULL,
        .attach         = NULL,
        .fork           = freezer_fork,
        .exit           = NULL,
 
        return val;
 }
 
-/* Protected by cgroup_lock */
-static cpumask_var_t cpus_attach;
-
 /* Called by cgroups to determine if a cpuset is usable; cgroup_mutex held */
 static int cpuset_can_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                            struct task_struct *tsk, bool threadgroup)
+                            struct task_struct *tsk)
 {
-       int ret;
        struct cpuset *cs = cgroup_cs(cont);
 
        if (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
        if (tsk->flags & PF_THREAD_BOUND)
                return -EINVAL;
 
-       ret = security_task_setscheduler(tsk);
-       if (ret)
-               return ret;
-       if (threadgroup) {
-               struct task_struct *c;
-
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       ret = security_task_setscheduler(c);
-                       if (ret) {
-                               rcu_read_unlock();
-                               return ret;
-                       }
-               }
-               rcu_read_unlock();
-       }
        return 0;
 }
 
-static void cpuset_attach_task(struct task_struct *tsk, nodemask_t *to,
-                              struct cpuset *cs)
+static int cpuset_can_attach_task(struct cgroup *cgrp, struct task_struct *task)
+{
+       return security_task_setscheduler(task);
+}
+
+/*
+ * Protected by cgroup_lock. The nodemasks must be stored globally because
+ * dynamically allocating them is not allowed in pre_attach, and they must
+ * persist among pre_attach, attach_task, and attach.
+ */
+static cpumask_var_t cpus_attach;
+static nodemask_t cpuset_attach_nodemask_from;
+static nodemask_t cpuset_attach_nodemask_to;
+
+/* Set-up work for before attaching each task. */
+static void cpuset_pre_attach(struct cgroup *cont)
+{
+       struct cpuset *cs = cgroup_cs(cont);
+
+       if (cs == &top_cpuset)
+               cpumask_copy(cpus_attach, cpu_possible_mask);
+       else
+               guarantee_online_cpus(cs, cpus_attach);
+
+       guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+}
+
+/* Per-thread attachment work. */
+static void cpuset_attach_task(struct cgroup *cont, struct task_struct *tsk)
 {
        int err;
+       struct cpuset *cs = cgroup_cs(cont);
+
        /*
         * can_attach beforehand should guarantee that this doesn't fail.
         * TODO: have a better way to handle failure here
        err = set_cpus_allowed_ptr(tsk, cpus_attach);
        WARN_ON_ONCE(err);
 
-       cpuset_change_task_nodemask(tsk, to);
+       cpuset_change_task_nodemask(tsk, &cpuset_attach_nodemask_to);
        cpuset_update_task_spread_flag(cs, tsk);
-
 }
 
 static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
-                         struct cgroup *oldcont, struct task_struct *tsk,
-                         bool threadgroup)
+                         struct cgroup *oldcont, struct task_struct *tsk)
 {
        struct mm_struct *mm;
        struct cpuset *cs = cgroup_cs(cont);
        struct cpuset *oldcs = cgroup_cs(oldcont);
-       static nodemask_t to;           /* protected by cgroup_mutex */
 
-       if (cs == &top_cpuset) {
-               cpumask_copy(cpus_attach, cpu_possible_mask);
-       } else {
-               guarantee_online_cpus(cs, cpus_attach);
-       }
-       guarantee_online_mems(cs, &to);
-
-       /* do per-task migration stuff possibly for each in the threadgroup */
-       cpuset_attach_task(tsk, &to, cs);
-       if (threadgroup) {
-               struct task_struct *c;
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       cpuset_attach_task(c, &to, cs);
-               }
-               rcu_read_unlock();
-       }
-
-       /* change mm; only needs to be done once even if threadgroup */
-       to = cs->mems_allowed;
+       /*
+        * Change mm, possibly for multiple threads in a threadgroup. This is
+        * expensive and may sleep.
+        */
+       cpuset_attach_nodemask_from = oldcs->mems_allowed;
+       cpuset_attach_nodemask_to = cs->mems_allowed;
        mm = get_task_mm(tsk);
        if (mm) {
-               mpol_rebind_mm(mm, &to);
+               mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
                if (is_memory_migrate(cs))
-                       cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
+                       cpuset_migrate_mm(mm, &cpuset_attach_nodemask_from,
+                                         &cpuset_attach_nodemask_to);
                mmput(mm);
        }
 }
        .create = cpuset_create,
        .destroy = cpuset_destroy,
        .can_attach = cpuset_can_attach,
+       .can_attach_task = cpuset_can_attach_task,
+       .pre_attach = cpuset_pre_attach,
+       .attach_task = cpuset_attach_task,
        .attach = cpuset_attach,
        .populate = cpuset_populate,
        .post_clone = cpuset_post_clone,
 
        return 0;
 }
 
-static int
-cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                     struct task_struct *tsk, bool threadgroup)
-{
-       int retval = cpu_cgroup_can_attach_task(cgrp, tsk);
-       if (retval)
-               return retval;
-       if (threadgroup) {
-               struct task_struct *c;
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       retval = cpu_cgroup_can_attach_task(cgrp, c);
-                       if (retval) {
-                               rcu_read_unlock();
-                               return retval;
-                       }
-               }
-               rcu_read_unlock();
-       }
-       return 0;
-}
-
 static void
-cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
-                 struct cgroup *old_cont, struct task_struct *tsk,
-                 bool threadgroup)
+cpu_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
        sched_move_task(tsk);
-       if (threadgroup) {
-               struct task_struct *c;
-               rcu_read_lock();
-               list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
-                       sched_move_task(c);
-               }
-               rcu_read_unlock();
-       }
 }
 
 static void
        .name           = "cpu",
        .create         = cpu_cgroup_create,
        .destroy        = cpu_cgroup_destroy,
-       .can_attach     = cpu_cgroup_can_attach,
-       .attach         = cpu_cgroup_attach,
+       .can_attach_task = cpu_cgroup_can_attach_task,
+       .attach_task    = cpu_cgroup_attach_task,
        .exit           = cpu_cgroup_exit,
        .populate       = cpu_cgroup_populate,
        .subsys_id      = cpu_cgroup_subsys_id,
 
 
 static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
                                struct cgroup *cgroup,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
 {
        int ret = 0;
        struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup);
 
 static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
                                struct cgroup *cgroup,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
 {
        mem_cgroup_clear_mc();
 }
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
                                struct cgroup *cont,
                                struct cgroup *old_cont,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
 {
        struct mm_struct *mm;
 
 #else  /* !CONFIG_MMU */
 static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
                                struct cgroup *cgroup,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
 {
        return 0;
 }
 static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
                                struct cgroup *cgroup,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
 {
 }
 static void mem_cgroup_move_task(struct cgroup_subsys *ss,
                                struct cgroup *cont,
                                struct cgroup *old_cont,
-                               struct task_struct *p,
-                               bool threadgroup)
+                               struct task_struct *p)
 {
 }
 #endif
 
 struct cgroup_subsys devices_subsys;
 
 static int devcgroup_can_attach(struct cgroup_subsys *ss,
-               struct cgroup *new_cgroup, struct task_struct *task,
-               bool threadgroup)
+               struct cgroup *new_cgroup, struct task_struct *task)
 {
        if (current != task && !capable(CAP_SYS_ADMIN))
                        return -EPERM;