return rq->avg_load_per_task;
 }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-
-/*
- * Compute the cpu's hierarchical load factor for each task group.
- * This needs to be done in a top-down fashion because the load of a child
- * group is a fraction of its parents load.
- */
-static int tg_load_down(struct task_group *tg, void *data)
-{
-       unsigned long load;
-       long cpu = (long)data;
-
-       if (!tg->parent) {
-               load = cpu_rq(cpu)->load.weight;
-       } else {
-               load = tg->parent->cfs_rq[cpu]->h_load;
-               load *= tg->se[cpu]->load.weight;
-               load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
-       }
-
-       tg->cfs_rq[cpu]->h_load = load;
-
-       return 0;
-}
-
-static void update_h_load(long cpu)
-{
-       walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
-}
-
-#endif
-
 #ifdef CONFIG_PREEMPT
 
 static void double_rq_lock(struct rq *rq1, struct rq *rq2);
 
        struct rq *rq = cpu_rq(cpu);
 
        rcu_read_lock();
+       /*
+        * Iterates the task_group tree in a bottom up fashion, see
+        * list_add_leaf_cfs_rq() for details.
+        */
        for_each_leaf_cfs_rq(rq, cfs_rq)
                update_shares_cpu(cfs_rq->tg, cpu);
        rcu_read_unlock();
 }
 
+/*
+ * Compute the cpu's hierarchical load factor for each task group.
+ * This needs to be done in a top-down fashion because the load of a child
+ * group is a fraction of its parents load.
+ */
+static int tg_load_down(struct task_group *tg, void *data)
+{
+       unsigned long load;
+       long cpu = (long)data;
+
+       if (!tg->parent) {
+               load = cpu_rq(cpu)->load.weight;
+       } else {
+               load = tg->parent->cfs_rq[cpu]->h_load;
+               load *= tg->se[cpu]->load.weight;
+               load /= tg->parent->cfs_rq[cpu]->load.weight + 1;
+       }
+
+       tg->cfs_rq[cpu]->h_load = load;
+
+       return 0;
+}
+
+static void update_h_load(long cpu)
+{
+       walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
+}
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                  unsigned long max_load_move,
                  int *all_pinned)
 {
        long rem_load_move = max_load_move;
-       int busiest_cpu = cpu_of(busiest);
-       struct task_group *tg;
+       struct cfs_rq *busiest_cfs_rq;
 
        rcu_read_lock();
-       update_h_load(busiest_cpu);
+       update_h_load(cpu_of(busiest));
 
-       list_for_each_entry_rcu(tg, &task_groups, list) {
-               struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
+       for_each_leaf_cfs_rq(busiest, busiest_cfs_rq) {
                unsigned long busiest_h_load = busiest_cfs_rq->h_load;
                unsigned long busiest_weight = busiest_cfs_rq->load.weight;
                u64 rem_load, moved_load;