void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
-       s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
-               spread, rq0_min_vruntime, spread0;
+       s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, spread;
+       struct sched_entity *last, *first;
        struct rq *rq = cpu_rq(cpu);
-       struct sched_entity *last;
        unsigned long flags;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
                        SPLIT_NS(cfs_rq->exec_clock));
 
        raw_spin_rq_lock_irqsave(rq, flags);
-       if (rb_first_cached(&cfs_rq->tasks_timeline))
-               MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
+       first = __pick_first_entity(cfs_rq);
+       if (first)
+               left_vruntime = first->vruntime;
        last = __pick_last_entity(cfs_rq);
        if (last)
-               max_vruntime = last->vruntime;
+               right_vruntime = last->vruntime;
        min_vruntime = cfs_rq->min_vruntime;
-       rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
        raw_spin_rq_unlock_irqrestore(rq, flags);
-       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
-                       SPLIT_NS(MIN_vruntime));
+
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "left_vruntime",
+                       SPLIT_NS(left_vruntime));
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
                        SPLIT_NS(min_vruntime));
-       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
-                       SPLIT_NS(max_vruntime));
-       spread = max_vruntime - MIN_vruntime;
-       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
-                       SPLIT_NS(spread));
-       spread0 = min_vruntime - rq0_min_vruntime;
-       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
-                       SPLIT_NS(spread0));
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "avg_vruntime",
+                       SPLIT_NS(avg_vruntime(cfs_rq)));
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "right_vruntime",
+                       SPLIT_NS(right_vruntime));
+       spread = right_vruntime - left_vruntime;
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
        SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
                        cfs_rq->nr_spread_over);
        SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
 
        return (s64)(a->vruntime - b->vruntime) < 0;
 }
 
+static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       return (s64)(se->vruntime - cfs_rq->min_vruntime);
+}
+
 #define __node_2_se(node) \
        rb_entry((node), struct sched_entity, run_node)
 
+/*
+ * Compute virtual time from the per-task service numbers:
+ *
+ * Fair schedulers conserve lag:
+ *
+ *   \Sum lag_i = 0
+ *
+ * Where lag_i is given by:
+ *
+ *   lag_i = S - s_i = w_i * (V - v_i)
+ *
+ * Where S is the ideal service time and V is it's virtual time counterpart.
+ * Therefore:
+ *
+ *   \Sum lag_i = 0
+ *   \Sum w_i * (V - v_i) = 0
+ *   \Sum w_i * V - w_i * v_i = 0
+ *
+ * From which we can solve an expression for V in v_i (which we have in
+ * se->vruntime):
+ *
+ *       \Sum v_i * w_i   \Sum v_i * w_i
+ *   V = -------------- = --------------
+ *          \Sum w_i            W
+ *
+ * Specifically, this is the weighted average of all entity virtual runtimes.
+ *
+ * [[ NOTE: this is only equal to the ideal scheduler under the condition
+ *          that join/leave operations happen at lag_i = 0, otherwise the
+ *          virtual time has non-continguous motion equivalent to:
+ *
+ *           V +-= lag_i / W
+ *
+ *         Also see the comment in place_entity() that deals with this. ]]
+ *
+ * However, since v_i is u64, and the multiplcation could easily overflow
+ * transform it into a relative form that uses smaller quantities:
+ *
+ * Substitute: v_i == (v_i - v0) + v0
+ *
+ *     \Sum ((v_i - v0) + v0) * w_i   \Sum (v_i - v0) * w_i
+ * V = ---------------------------- = --------------------- + v0
+ *                  W                            W
+ *
+ * Which we track using:
+ *
+ *                    v0 := cfs_rq->min_vruntime
+ * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
+ *              \Sum w_i := cfs_rq->avg_load
+ *
+ * Since min_vruntime is a monotonic increasing variable that closely tracks
+ * the per-task service, these deltas: (v_i - v), will be in the order of the
+ * maximal (virtual) lag induced in the system due to quantisation.
+ *
+ * Also, we use scale_load_down() to reduce the size.
+ *
+ * As measured, the max (key * weight) value was ~44 bits for a kernel build.
+ */
+static void
+avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       unsigned long weight = scale_load_down(se->load.weight);
+       s64 key = entity_key(cfs_rq, se);
+
+       cfs_rq->avg_vruntime += key * weight;
+       cfs_rq->avg_load += weight;
+}
+
+static void
+avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       unsigned long weight = scale_load_down(se->load.weight);
+       s64 key = entity_key(cfs_rq, se);
+
+       cfs_rq->avg_vruntime -= key * weight;
+       cfs_rq->avg_load -= weight;
+}
+
+static inline
+void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+{
+       /*
+        * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+        */
+       cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+}
+
+u64 avg_vruntime(struct cfs_rq *cfs_rq)
+{
+       struct sched_entity *curr = cfs_rq->curr;
+       s64 avg = cfs_rq->avg_vruntime;
+       long load = cfs_rq->avg_load;
+
+       if (curr && curr->on_rq) {
+               unsigned long weight = scale_load_down(curr->load.weight);
+
+               avg += entity_key(cfs_rq, curr) * weight;
+               load += weight;
+       }
+
+       if (load)
+               avg = div_s64(avg, load);
+
+       return cfs_rq->min_vruntime + avg;
+}
+
+static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
+{
+       u64 min_vruntime = cfs_rq->min_vruntime;
+       /*
+        * open coded max_vruntime() to allow updating avg_vruntime
+        */
+       s64 delta = (s64)(vruntime - min_vruntime);
+       if (delta > 0) {
+               avg_vruntime_update(cfs_rq, delta);
+               min_vruntime = vruntime;
+       }
+       return min_vruntime;
+}
+
 static void update_min_vruntime(struct cfs_rq *cfs_rq)
 {
        struct sched_entity *curr = cfs_rq->curr;
 
        /* ensure we never gain time by being placed backwards. */
        u64_u32_store(cfs_rq->min_vruntime,
-                     max_vruntime(cfs_rq->min_vruntime, vruntime));
+                     __update_min_vruntime(cfs_rq, vruntime));
 }
 
 static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
  */
 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+       avg_vruntime_add(cfs_rq, se);
        rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less);
 }
 
 static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
        rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline);
+       avg_vruntime_sub(cfs_rq, se);
 }
 
 struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
                /* commit outstanding execution time */
                if (cfs_rq->curr == se)
                        update_curr(cfs_rq);
+               else
+                       avg_vruntime_sub(cfs_rq, se);
                update_load_sub(&cfs_rq->load, se->load.weight);
        }
        dequeue_load_avg(cfs_rq, se);
 #endif
 
        enqueue_load_avg(cfs_rq, se);
-       if (se->on_rq)
+       if (se->on_rq) {
                update_load_add(&cfs_rq->load, se->load.weight);
-
+               if (cfs_rq->curr != se)
+                       avg_vruntime_add(cfs_rq, se);
+       }
 }
 
 void reweight_task(struct task_struct *p, int prio)