sched: Prevent interactions with throttled entities

author Paul Turner <pjt@google.com>

Thu, 21 Jul 2011 16:43:36 +0000 (09:43 -0700)

committer Ingo Molnar <mingo@elte.hu>

Sun, 14 Aug 2011 10:03:40 +0000 (12:03 +0200)
author Paul Turner <pjt@google.com>
Thu, 21 Jul 2011 16:43:36 +0000 (09:43 -0700)
committer Ingo Molnar <mingo@elte.hu>
Sun, 14 Aug 2011 10:03:40 +0000 (12:03 +0200)
diff --git a/kernel/sched.c b/kernel/sched.c

index 8ec1e7ac2894f0019f93e70fa3fa49c47b08e1b3..5db05f6fb470b9a99d2314b8c71a66dda944c9d1 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -402,7 +402,7 @@ struct cfs_rq {
         u64 runtime_expires;
         s64 runtime_remaining;
  
-       int throttled;
+       int throttled, throttle_count;
         struct list_head throttled_list;
  #endif
  #endif
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index 76411950ff3bd367777e54c06637c25e8a20d8ab..5a2089492a983c5a1b8f7bb2b526f9ededab85c8 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -706,6 +706,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
  }
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
+/* we need this in update_cfs_load and load-balance functions below */
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
  # ifdef CONFIG_SMP
  static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
                                             int global_update)
@@ -728,7 +730,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
         u64 now, delta;
         unsigned long load = cfs_rq->load.weight;
  
-       if (cfs_rq->tg == &root_task_group)
+       if (cfs_rq->tg == &root_task_group || throttled_hierarchy(cfs_rq))
                 return;
  
         now = rq_of(cfs_rq)->clock_task;
@@ -837,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq)
  
         tg = cfs_rq->tg;
         se = tg->se[cpu_of(rq_of(cfs_rq))];
-       if (!se)
+       if (!se || throttled_hierarchy(cfs_rq))
                 return;
  #ifndef CONFIG_SMP
         if (likely(se->load.weight == tg->shares))
@@ -1403,6 +1405,65 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
         return cfs_rq->throttled;
  }
  
+/* check whether cfs_rq, or any parent, is throttled */
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
+{
+       return cfs_rq->throttle_count;
+}
+
+/*
+ * Ensure that neither of the group entities corresponding to src_cpu or
+ * dest_cpu are members of a throttled hierarchy when performing group
+ * load-balance operations.
+ */
+static inline int throttled_lb_pair(struct task_group *tg,
+                                   int src_cpu, int dest_cpu)
+{
+       struct cfs_rq *src_cfs_rq, *dest_cfs_rq;
+
+       src_cfs_rq = tg->cfs_rq[src_cpu];
+       dest_cfs_rq = tg->cfs_rq[dest_cpu];
+
+       return throttled_hierarchy(src_cfs_rq) ||
+              throttled_hierarchy(dest_cfs_rq);
+}
+
+/* updated child weight may affect parent so we have to do this bottom up */
+static int tg_unthrottle_up(struct task_group *tg, void *data)
+{
+       struct rq *rq = data;
+       struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
+
+       cfs_rq->throttle_count--;
+#ifdef CONFIG_SMP
+       if (!cfs_rq->throttle_count) {
+               u64 delta = rq->clock_task - cfs_rq->load_stamp;
+
+               /* leaving throttled state, advance shares averaging windows */
+               cfs_rq->load_stamp += delta;
+               cfs_rq->load_last += delta;
+
+               /* update entity weight now that we are on_rq again */
+               update_cfs_shares(cfs_rq);
+       }
+#endif
+
+       return 0;
+}
+
+static int tg_throttle_down(struct task_group *tg, void *data)
+{
+       struct rq *rq = data;
+       struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
+
+       /* group is entering throttled state, record last load */
+       if (!cfs_rq->throttle_count)
+               update_cfs_load(cfs_rq, 0);
+       cfs_rq->throttle_count++;
+
+       return 0;
+}
+
  static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
  {
         struct rq *rq = rq_of(cfs_rq);
@@ -1413,7 +1474,9 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
         se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
  
         /* account load preceding throttle */
-       update_cfs_load(cfs_rq, 0);
+       rcu_read_lock();
+       walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
+       rcu_read_unlock();
  
         task_delta = cfs_rq->h_nr_running;
         for_each_sched_entity(se) {
@@ -1454,6 +1517,10 @@ static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
         list_del_rcu(&cfs_rq->throttled_list);
         raw_spin_unlock(&cfs_b->lock);
  
+       update_rq_clock(rq);
+       /* update hierarchical throttle state */
+       walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
+
         if (!cfs_rq->load.weight)
                 return;
  
@@ -1598,6 +1665,17 @@ static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
  {
         return 0;
  }
+
+static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
+{
+       return 0;
+}
+
+static inline int throttled_lb_pair(struct task_group *tg,
+                                   int src_cpu, int dest_cpu)
+{
+       return 0;
+}
  #endif
  
  /**************************************************
@@ -2493,6 +2571,9 @@ move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
  
         for_each_leaf_cfs_rq(busiest, cfs_rq) {
                 list_for_each_entry_safe(p, n, &cfs_rq->tasks, se.group_node) {
+                       if (throttled_lb_pair(task_group(p),
+                                             busiest->cpu, this_cpu))
+                               break;
  
                         if (!can_migrate_task(p, busiest, this_cpu,
                                                 sd, idle, &pinned))
@@ -2608,8 +2689,13 @@ static void update_shares(int cpu)
          * Iterates the task_group tree in a bottom up fashion, see
          * list_add_leaf_cfs_rq() for details.
          */
-       for_each_leaf_cfs_rq(rq, cfs_rq)
+       for_each_leaf_cfs_rq(rq, cfs_rq) {
+               /* throttled entities do not contribute to load */
+               if (throttled_hierarchy(cfs_rq))
+                       continue;
+
                 update_shares_cpu(cfs_rq->tg, cpu);
+       }
         rcu_read_unlock();
  }
  
@@ -2659,9 +2745,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
                 u64 rem_load, moved_load;
  
                 /*
-                * empty group
+                * empty group or part of a throttled hierarchy
                  */
-               if (!busiest_cfs_rq->task_weight)
+               if (!busiest_cfs_rq->task_weight ||
+                   throttled_lb_pair(busiest_cfs_rq->tg, cpu_of(busiest), this_cpu))
                         continue;
  
                 rem_load = (u64)rem_load_move * busiest_weight;
author	Paul Turner <pjt@google.com>
	Thu, 21 Jul 2011 16:43:36 +0000 (09:43 -0700)
committer	Ingo Molnar <mingo@elte.hu>
	Sun, 14 Aug 2011 10:03:40 +0000 (12:03 +0200)
kernel/sched.c		patch \| blob \| history
kernel/sched_fair.c		patch \| blob \| history