SCHED_WARN_ON(rq->tmp_alone_branch != &rq->leaf_cfs_rq_list);
 }
 
-/* Iterate through all cfs_rq's on a runqueue in bottom-up order */
-#define for_each_leaf_cfs_rq(rq, cfs_rq) \
-       list_for_each_entry_rcu(cfs_rq, &rq->leaf_cfs_rq_list, leaf_cfs_rq_list)
+/* Iterate thr' all leaf cfs_rq's on a runqueue */
+#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)                     \
+       list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list,    \
+                                leaf_cfs_rq_list)
 
 /* Do the two (enqueued) entities belong to the same group ? */
 static inline struct cfs_rq *
 {
 }
 
-#define for_each_leaf_cfs_rq(rq, cfs_rq)       \
-               for (cfs_rq = &rq->cfs; cfs_rq; cfs_rq = NULL)
+#define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)     \
+               for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos)
 
 static inline struct sched_entity *parent_entity(struct sched_entity *se)
 {
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
 
+static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
+{
+       if (cfs_rq->load.weight)
+               return false;
+
+       if (cfs_rq->avg.load_sum)
+               return false;
+
+       if (cfs_rq->avg.util_sum)
+               return false;
+
+       if (cfs_rq->avg.runnable_load_sum)
+               return false;
+
+       return true;
+}
+
 static void update_blocked_averages(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
-       struct cfs_rq *cfs_rq;
+       struct cfs_rq *cfs_rq, *pos;
        const struct sched_class *curr_class;
        struct rq_flags rf;
        bool done = true;
         * Iterates the task_group tree in a bottom up fashion, see
         * list_add_leaf_cfs_rq() for details.
         */
-       for_each_leaf_cfs_rq(rq, cfs_rq) {
+       for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
                struct sched_entity *se;
 
                if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq))
                if (se && !skip_blocked_update(se))
                        update_load_avg(cfs_rq_of(se), se, 0);
 
+               /*
+                * There can be a lot of idle CPU cgroups.  Don't let fully
+                * decayed cfs_rqs linger on the list.
+                */
+               if (cfs_rq_is_decayed(cfs_rq))
+                       list_del_leaf_cfs_rq(cfs_rq);
+
                /* Don't need periodic decay once load/util_avg are null */
                if (cfs_rq_has_blocked(cfs_rq))
                        done = false;
 #ifdef CONFIG_SCHED_DEBUG
 void print_cfs_stats(struct seq_file *m, int cpu)
 {
-       struct cfs_rq *cfs_rq;
+       struct cfs_rq *cfs_rq, *pos;
 
        rcu_read_lock();
-       for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
+       for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos)
                print_cfs_rq(m, cpu, cfs_rq);
        rcu_read_unlock();
 }