From 64ca179f2df9b65b101abd13c8b3a19cb05e9cba Mon Sep 17 00:00:00 2001 From: Yuyang Du Date: Wed, 15 Jul 2015 08:04:41 +0800 Subject: [PATCH] sched/fair: Provide runnable_load_avg back to cfs_rq The cfs_rq's load_avg is composed of runnable_load_avg and blocked_load_avg. Before this series, sometimes the runnable_load_avg is used, and sometimes the load_avg is used. Completely replacing all uses of runnable_load_avg with load_avg may be too big a leap, i.e., the blocked_load_avg is concerned to result in overrated load. Therefore, we get runnable_load_avg back. The new cfs_rq's runnable_load_avg is improved to be updated with all of the runnable sched_eneities at the same time, so the one sched_entity updated and the others stale problem is solved. Orabug: 25544560 Signed-off-by: Yuyang Du Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arjan@linux.intel.com Cc: bsegall@google.com Cc: dietmar.eggemann@arm.com Cc: fengguang.wu@intel.com Cc: len.brown@intel.com Cc: morten.rasmussen@arm.com Cc: pjt@google.com Cc: rafael.j.wysocki@intel.com Cc: umgwanakikbuti@gmail.com Cc: vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/1436918682-4971-7-git-send-email-yuyang.du@intel.com Signed-off-by: Ingo Molnar (cherry picked from commit 139622343ef31941effc6de6a5a9320371a00e62) Signed-off-by: Atish Patra Signed-off-by: Allen Pais --- kernel/sched/debug.c | 2 ++ kernel/sched/fair.c | 55 ++++++++++++++++++++++++++++++++++++-------- kernel/sched/sched.h | 2 ++ 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index aaeda7b0d76d..a7ddb760cca0 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -205,6 +205,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #ifdef CONFIG_SMP SEQ_printf(m, " .%-30s: %lu\n", "load_avg", cfs_rq->avg.load_avg); + SEQ_printf(m, " .%-30s: %lu\n", "runnable_load_avg", + cfs_rq->runnable_load_avg); SEQ_printf(m, " .%-30s: %lu\n", "util_avg", cfs_rq->avg.util_avg); SEQ_printf(m, " .%-30s: %ld\n", "removed_load_avg", diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f1f72c89ae7d..ff183de3e72c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2518,7 +2518,7 @@ static u32 __compute_runnable_contrib(u64 n) */ static __always_inline int __update_load_avg(u64 now, int cpu, struct sched_avg *sa, - unsigned long weight, int running) + unsigned long weight, int running, struct cfs_rq *cfs_rq) { u64 delta, periods; u32 contrib; @@ -2558,8 +2558,11 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, * period and accrue it. */ delta_w = 1024 - delta_w; - if (weight) + if (weight) { sa->load_sum += weight * delta_w; + if (cfs_rq) + cfs_rq->runnable_load_sum += weight * delta_w; + } if (running) sa->util_sum += delta_w * scale_freq >> SCHED_CAPACITY_SHIFT; @@ -2570,19 +2573,29 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, delta %= 1024; sa->load_sum = decay_load(sa->load_sum, periods + 1); + if (cfs_rq) { + cfs_rq->runnable_load_sum = + decay_load(cfs_rq->runnable_load_sum, periods + 1); + } sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1); /* Efficiently calculate \sum (1..n_period) 1024*y^i */ contrib = __compute_runnable_contrib(periods); - if (weight) + if (weight) { sa->load_sum += weight * contrib; + if (cfs_rq) + cfs_rq->runnable_load_sum += weight * contrib; + } if (running) sa->util_sum += contrib * scale_freq >> SCHED_CAPACITY_SHIFT; } /* Remainder of delta accrued against u_0` */ - if (weight) + if (weight) { sa->load_sum += weight * delta; + if (cfs_rq) + cfs_rq->runnable_load_sum += weight * delta; + } if (running) sa->util_sum += delta * scale_freq >> SCHED_CAPACITY_SHIFT; @@ -2590,6 +2603,10 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa, if (decayed) { sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX); + if (cfs_rq) { + cfs_rq->runnable_load_avg = + div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX); + } sa->util_avg = (sa->util_sum << SCHED_LOAD_SHIFT) / LOAD_AVG_MAX; } @@ -2637,7 +2654,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) } decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa, - scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL); + scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq); #ifndef CONFIG_64BIT smp_wmb(); @@ -2659,7 +2676,7 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg) * track group sched_entity load average for task_h_load calc in migration */ __update_load_avg(now, cpu, &se->avg, - se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se); + se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL); if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg) update_tg_load_avg(cfs_rq, 0); @@ -2679,11 +2696,15 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) } else { __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa, - se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se); + se->on_rq * scale_load_down(se->load.weight), + cfs_rq->curr == se, NULL); } decayed = update_cfs_rq_load_avg(now, cfs_rq); + cfs_rq->runnable_load_avg += sa->load_avg; + cfs_rq->runnable_load_sum += sa->load_sum; + if (migrated) { cfs_rq->avg.load_avg += sa->load_avg; cfs_rq->avg.load_sum += sa->load_sum; @@ -2695,6 +2716,18 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) update_tg_load_avg(cfs_rq, 0); } +/* Remove the runnable load generated by se from cfs_rq's runnable load average */ +static inline void +dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ + update_load_avg(se, 1); + + cfs_rq->runnable_load_avg = + max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0); + cfs_rq->runnable_load_sum = + max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0); +} + /* * Task first catches up with cfs_rq, and then subtract * itself from the cfs_rq (task must be off the queue now). @@ -2716,7 +2749,7 @@ void remove_entity_load_avg(struct sched_entity *se) last_update_time = cfs_rq->avg.last_update_time; #endif - __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0); + __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL); atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg); atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg); } @@ -2746,6 +2779,8 @@ static int idle_balance(struct rq *this_rq); static inline void update_load_avg(struct sched_entity *se, int update_tg) {} static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} +static inline void +dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {} static inline void remove_entity_load_avg(struct sched_entity *se) {} static inline int idle_balance(struct rq *rq) @@ -2953,7 +2988,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) * Update run-time statistics of the 'current'. */ update_curr(cfs_rq); - update_load_avg(se, 1); + dequeue_entity_load_avg(cfs_rq, se); update_stats_dequeue(cfs_rq, se); if (flags & DEQUEUE_SLEEP) { @@ -7881,7 +7916,7 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p) #ifdef CONFIG_SMP /* Catch up with the cfs_rq and remove our load when we leave */ __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq), &se->avg, - se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se); + se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL); cfs_rq->avg.load_avg = max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 923811461a37..0efdc31cc519 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -372,6 +372,8 @@ struct cfs_rq { * CFS load tracking */ struct sched_avg avg; + u64 runnable_load_sum; + unsigned long runnable_load_avg; #ifdef CONFIG_FAIR_GROUP_SCHED unsigned long tg_load_avg_contrib; #endif -- 2.50.1