]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
sched/fair: Provide runnable_load_avg back to cfs_rq
authorYuyang Du <yuyang.du@intel.com>
Wed, 15 Jul 2015 00:04:41 +0000 (08:04 +0800)
committerAllen Pais <allen.pais@oracle.com>
Tue, 16 May 2017 04:31:16 +0000 (10:01 +0530)
The cfs_rq's load_avg is composed of runnable_load_avg and blocked_load_avg.
Before this series, sometimes the runnable_load_avg is used, and sometimes
the load_avg is used. Completely replacing all uses of runnable_load_avg
with load_avg may be too big a leap, i.e., the blocked_load_avg is concerned
to result in overrated load. Therefore, we get runnable_load_avg back.

The new cfs_rq's runnable_load_avg is improved to be updated with all of the
runnable sched_eneities at the same time, so the one sched_entity updated and
the others stale problem is solved.

Orabug: 25544560

Signed-off-by: Yuyang Du <yuyang.du@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: arjan@linux.intel.com
Cc: bsegall@google.com
Cc: dietmar.eggemann@arm.com
Cc: fengguang.wu@intel.com
Cc: len.brown@intel.com
Cc: morten.rasmussen@arm.com
Cc: pjt@google.com
Cc: rafael.j.wysocki@intel.com
Cc: umgwanakikbuti@gmail.com
Cc: vincent.guittot@linaro.org
Link: http://lkml.kernel.org/r/1436918682-4971-7-git-send-email-yuyang.du@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
(cherry picked from commit 139622343ef31941effc6de6a5a9320371a00e62)

Signed-off-by: Atish Patra <atish.patra@oracle.com>
Signed-off-by: Allen Pais <allen.pais@oracle.com>
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/sched.h

index aaeda7b0d76d275ab06256e849650d1657ee0af8..a7ddb760cca03ea4f4f3e26c73ba7207a7e33e0c 100644 (file)
@@ -205,6 +205,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #ifdef CONFIG_SMP
        SEQ_printf(m, "  .%-30s: %lu\n", "load_avg",
                        cfs_rq->avg.load_avg);
+       SEQ_printf(m, "  .%-30s: %lu\n", "runnable_load_avg",
+                       cfs_rq->runnable_load_avg);
        SEQ_printf(m, "  .%-30s: %lu\n", "util_avg",
                        cfs_rq->avg.util_avg);
        SEQ_printf(m, "  .%-30s: %ld\n", "removed_load_avg",
index f1f72c89ae7d11e9196ecb15d70703ac1de2146f..ff183de3e72c3d2de0437d0076c214753312e418 100644 (file)
@@ -2518,7 +2518,7 @@ static u32 __compute_runnable_contrib(u64 n)
  */
 static __always_inline int
 __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
-                 unsigned long weight, int running)
+                 unsigned long weight, int running, struct cfs_rq *cfs_rq)
 {
        u64 delta, periods;
        u32 contrib;
@@ -2558,8 +2558,11 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
                 * period and accrue it.
                 */
                delta_w = 1024 - delta_w;
-               if (weight)
+               if (weight) {
                        sa->load_sum += weight * delta_w;
+                       if (cfs_rq)
+                               cfs_rq->runnable_load_sum += weight * delta_w;
+               }
                if (running)
                        sa->util_sum += delta_w * scale_freq >> SCHED_CAPACITY_SHIFT;
 
@@ -2570,19 +2573,29 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
                delta %= 1024;
 
                sa->load_sum = decay_load(sa->load_sum, periods + 1);
+               if (cfs_rq) {
+                       cfs_rq->runnable_load_sum =
+                               decay_load(cfs_rq->runnable_load_sum, periods + 1);
+               }
                sa->util_sum = decay_load((u64)(sa->util_sum), periods + 1);
 
                /* Efficiently calculate \sum (1..n_period) 1024*y^i */
                contrib = __compute_runnable_contrib(periods);
-               if (weight)
+               if (weight) {
                        sa->load_sum += weight * contrib;
+                       if (cfs_rq)
+                               cfs_rq->runnable_load_sum += weight * contrib;
+               }
                if (running)
                        sa->util_sum += contrib * scale_freq >> SCHED_CAPACITY_SHIFT;
        }
 
        /* Remainder of delta accrued against u_0` */
-       if (weight)
+       if (weight) {
                sa->load_sum += weight * delta;
+               if (cfs_rq)
+                       cfs_rq->runnable_load_sum += weight * delta;
+       }
        if (running)
                sa->util_sum += delta * scale_freq >> SCHED_CAPACITY_SHIFT;
 
@@ -2590,6 +2603,10 @@ __update_load_avg(u64 now, int cpu, struct sched_avg *sa,
 
        if (decayed) {
                sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX);
+               if (cfs_rq) {
+                       cfs_rq->runnable_load_avg =
+                               div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX);
+               }
                sa->util_avg = (sa->util_sum << SCHED_LOAD_SHIFT) / LOAD_AVG_MAX;
        }
 
@@ -2637,7 +2654,7 @@ static inline int update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
        }
 
        decayed = __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
-               scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL);
+               scale_load_down(cfs_rq->load.weight), cfs_rq->curr != NULL, cfs_rq);
 
 #ifndef CONFIG_64BIT
        smp_wmb();
@@ -2659,7 +2676,7 @@ static inline void update_load_avg(struct sched_entity *se, int update_tg)
         * track group sched_entity load average for task_h_load calc in migration
         */
        __update_load_avg(now, cpu, &se->avg,
-               se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se);
+               se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL);
 
        if (update_cfs_rq_load_avg(now, cfs_rq) && update_tg)
                update_tg_load_avg(cfs_rq, 0);
@@ -2679,11 +2696,15 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
        }
        else {
                __update_load_avg(now, cpu_of(rq_of(cfs_rq)), sa,
-                       se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se);
+                       se->on_rq * scale_load_down(se->load.weight),
+                       cfs_rq->curr == se, NULL);
        }
 
        decayed = update_cfs_rq_load_avg(now, cfs_rq);
 
+       cfs_rq->runnable_load_avg += sa->load_avg;
+       cfs_rq->runnable_load_sum += sa->load_sum;
+
        if (migrated) {
                cfs_rq->avg.load_avg += sa->load_avg;
                cfs_rq->avg.load_sum += sa->load_sum;
@@ -2695,6 +2716,18 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
                update_tg_load_avg(cfs_rq, 0);
 }
 
+/* Remove the runnable load generated by se from cfs_rq's runnable load average */
+static inline void
+dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       update_load_avg(se, 1);
+
+       cfs_rq->runnable_load_avg =
+               max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0);
+       cfs_rq->runnable_load_sum =
+               max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0);
+}
+
 /*
  * Task first catches up with cfs_rq, and then subtract
  * itself from the cfs_rq (task must be off the queue now).
@@ -2716,7 +2749,7 @@ void remove_entity_load_avg(struct sched_entity *se)
        last_update_time = cfs_rq->avg.last_update_time;
 #endif
 
-       __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0);
+       __update_load_avg(last_update_time, cpu_of(rq_of(cfs_rq)), &se->avg, 0, 0, NULL);
        atomic_long_add(se->avg.load_avg, &cfs_rq->removed_load_avg);
        atomic_long_add(se->avg.util_avg, &cfs_rq->removed_util_avg);
 }
@@ -2746,6 +2779,8 @@ static int idle_balance(struct rq *this_rq);
 static inline void update_load_avg(struct sched_entity *se, int update_tg) {}
 static inline void
 enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
+static inline void
+dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
 static inline void remove_entity_load_avg(struct sched_entity *se) {}
 
 static inline int idle_balance(struct rq *rq)
@@ -2953,7 +2988,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         * Update run-time statistics of the 'current'.
         */
        update_curr(cfs_rq);
-       update_load_avg(se, 1);
+       dequeue_entity_load_avg(cfs_rq, se);
 
        update_stats_dequeue(cfs_rq, se);
        if (flags & DEQUEUE_SLEEP) {
@@ -7881,7 +7916,7 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
 #ifdef CONFIG_SMP
        /* Catch up with the cfs_rq and remove our load when we leave */
        __update_load_avg(cfs_rq->avg.last_update_time, cpu_of(rq), &se->avg,
-               se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se);
+               se->on_rq * scale_load_down(se->load.weight), cfs_rq->curr == se, NULL);
 
        cfs_rq->avg.load_avg =
                max_t(long, cfs_rq->avg.load_avg - se->avg.load_avg, 0);
index 923811461a37bdcc7df5590c664f2d9ea776c639..0efdc31cc51975a3ab302c28cff8e9194cbc442c 100644 (file)
@@ -372,6 +372,8 @@ struct cfs_rq {
         * CFS load tracking
         */
        struct sched_avg avg;
+       u64 runnable_load_sum;
+       unsigned long runnable_load_avg;
 #ifdef CONFIG_FAIR_GROUP_SCHED
        unsigned long tg_load_avg_contrib;
 #endif