}
 
 /*
- * compute_energy(): Estimates the energy that @pd would consume if @p was
- * migrated to @dst_cpu. compute_energy() predicts what will be the utilization
- * landscape of @pd's CPUs after the task migration, and uses the Energy Model
- * to compute what would be the energy if we decided to actually migrate that
- * task.
+ * energy_env - Utilization landscape for energy estimation.
+ * @task_busy_time: Utilization contribution by the task for which we test the
+ *                  placement. Given by eenv_task_busy_time().
+ * @pd_busy_time:   Utilization of the whole perf domain without the task
+ *                  contribution. Given by eenv_pd_busy_time().
+ * @cpu_cap:        Maximum CPU capacity for the perf domain.
+ * @pd_cap:         Entire perf domain capacity. (pd->nr_cpus * cpu_cap).
+ */
+struct energy_env {
+       unsigned long task_busy_time;
+       unsigned long pd_busy_time;
+       unsigned long cpu_cap;
+       unsigned long pd_cap;
+};
+
+/*
+ * Compute the task busy time for compute_energy(). This time cannot be
+ * injected directly into effective_cpu_util() because of the IRQ scaling.
+ * The latter only makes sense with the most recent CPUs where the task has
+ * run.
  */
-static long
-compute_energy(struct task_struct *p, int dst_cpu, struct cpumask *cpus,
-              struct perf_domain *pd)
+static inline void eenv_task_busy_time(struct energy_env *eenv,
+                                      struct task_struct *p, int prev_cpu)
 {
-       unsigned long max_util = 0, sum_util = 0, cpu_cap;
+       unsigned long busy_time, max_cap = arch_scale_cpu_capacity(prev_cpu);
+       unsigned long irq = cpu_util_irq(cpu_rq(prev_cpu));
+
+       if (unlikely(irq >= max_cap))
+               busy_time = max_cap;
+       else
+               busy_time = scale_irq_capacity(task_util_est(p), irq, max_cap);
+
+       eenv->task_busy_time = busy_time;
+}
+
+/*
+ * Compute the perf_domain (PD) busy time for compute_energy(). Based on the
+ * utilization for each @pd_cpus, it however doesn't take into account
+ * clamping since the ratio (utilization / cpu_capacity) is already enough to
+ * scale the EM reported power consumption at the (eventually clamped)
+ * cpu_capacity.
+ *
+ * The contribution of the task @p for which we want to estimate the
+ * energy cost is removed (by cpu_util_next()) and must be calculated
+ * separately (see eenv_task_busy_time). This ensures:
+ *
+ *   - A stable PD utilization, no matter which CPU of that PD we want to place
+ *     the task on.
+ *
+ *   - A fair comparison between CPUs as the task contribution (task_util())
+ *     will always be the same no matter which CPU utilization we rely on
+ *     (util_avg or util_est).
+ *
+ * Set @eenv busy time for the PD that spans @pd_cpus. This busy time can't
+ * exceed @eenv->pd_cap.
+ */
+static inline void eenv_pd_busy_time(struct energy_env *eenv,
+                                    struct cpumask *pd_cpus,
+                                    struct task_struct *p)
+{
+       unsigned long busy_time = 0;
        int cpu;
 
-       cpu_cap = arch_scale_cpu_capacity(cpumask_first(cpus));
-       cpu_cap -= arch_scale_thermal_pressure(cpumask_first(cpus));
+       for_each_cpu(cpu, pd_cpus) {
+               unsigned long util = cpu_util_next(cpu, p, -1);
 
-       /*
-        * The capacity state of CPUs of the current rd can be driven by CPUs
-        * of another rd if they belong to the same pd. So, account for the
-        * utilization of these CPUs too by masking pd with cpu_online_mask
-        * instead of the rd span.
-        *
-        * If an entire pd is outside of the current rd, it will not appear in
-        * its pd list and will not be accounted by compute_energy().
-        */
-       for_each_cpu(cpu, cpus) {
-               unsigned long util_freq = cpu_util_next(cpu, p, dst_cpu);
-               unsigned long cpu_util, util_running = util_freq;
-               struct task_struct *tsk = NULL;
+               busy_time += effective_cpu_util(cpu, util, ENERGY_UTIL, NULL);
+       }
 
-               /*
-                * When @p is placed on @cpu:
-                *
-                * util_running = max(cpu_util, cpu_util_est) +
-                *                max(task_util, _task_util_est)
-                *
-                * while cpu_util_next is: max(cpu_util + task_util,
-                *                             cpu_util_est + _task_util_est)
-                */
-               if (cpu == dst_cpu) {
-                       tsk = p;
-                       util_running =
-                               cpu_util_next(cpu, p, -1) + task_util_est(p);
-               }
+       eenv->pd_busy_time = min(eenv->pd_cap, busy_time);
+}
 
-               /*
-                * Busy time computation: utilization clamping is not
-                * required since the ratio (sum_util / cpu_capacity)
-                * is already enough to scale the EM reported power
-                * consumption at the (eventually clamped) cpu_capacity.
-                */
-               cpu_util = effective_cpu_util(cpu, util_running, ENERGY_UTIL,
-                                             NULL);
+/*
+ * Compute the maximum utilization for compute_energy() when the task @p
+ * is placed on the cpu @dst_cpu.
+ *
+ * Returns the maximum utilization among @eenv->cpus. This utilization can't
+ * exceed @eenv->cpu_cap.
+ */
+static inline unsigned long
+eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
+                struct task_struct *p, int dst_cpu)
+{
+       unsigned long max_util = 0;
+       int cpu;
 
-               sum_util += min(cpu_util, cpu_cap);
+       for_each_cpu(cpu, pd_cpus) {
+               struct task_struct *tsk = (cpu == dst_cpu) ? p : NULL;
+               unsigned long util = cpu_util_next(cpu, p, dst_cpu);
+               unsigned long cpu_util;
 
                /*
                 * Performance domain frequency: utilization clamping
                 * NOTE: in case RT tasks are running, by default the
                 * FREQUENCY_UTIL's utilization can be max OPP.
                 */
-               cpu_util = effective_cpu_util(cpu, util_freq, FREQUENCY_UTIL,
-                                             tsk);
-               max_util = max(max_util, min(cpu_util, cpu_cap));
+               cpu_util = effective_cpu_util(cpu, util, FREQUENCY_UTIL, tsk);
+               max_util = max(max_util, cpu_util);
        }
 
-       return em_cpu_energy(pd->em_pd, max_util, sum_util, cpu_cap);
+       return min(max_util, eenv->cpu_cap);
+}
+
+/*
+ * compute_energy(): Use the Energy Model to estimate the energy that @pd would
+ * consume for a given utilization landscape @eenv. When @dst_cpu < 0, the task
+ * contribution is ignored.
+ */
+static inline unsigned long
+compute_energy(struct energy_env *eenv, struct perf_domain *pd,
+              struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
+{
+       unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
+       unsigned long busy_time = eenv->pd_busy_time;
+
+       if (dst_cpu >= 0)
+               busy_time = min(eenv->pd_cap, busy_time + eenv->task_busy_time);
+
+       return em_cpu_energy(pd->em_pd, max_util, busy_time, eenv->cpu_cap);
 }
 
 /*
 {
        struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
        unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
-       struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
        int cpu, best_energy_cpu = prev_cpu, target = -1;
-       unsigned long cpu_cap, util, base_energy = 0;
+       struct root_domain *rd = this_rq()->rd;
+       unsigned long base_energy = 0;
        struct sched_domain *sd;
        struct perf_domain *pd;
+       struct energy_env eenv;
 
        rcu_read_lock();
        pd = rcu_dereference(rd->pd);
        if (!task_util_est(p))
                goto unlock;
 
+       eenv_task_busy_time(&eenv, p, prev_cpu);
+
        for (; pd; pd = pd->next) {
-               unsigned long cur_delta, spare_cap, max_spare_cap = 0;
+               unsigned long cpu_cap, cpu_thermal_cap, util;
+               unsigned long cur_delta, max_spare_cap = 0;
                bool compute_prev_delta = false;
                unsigned long base_energy_pd;
                int max_spare_cap_cpu = -1;
 
                cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
 
-               for_each_cpu_and(cpu, cpus, sched_domain_span(sd)) {
+               if (cpumask_empty(cpus))
+                       continue;
+
+               /* Account thermal pressure for the energy estimation */
+               cpu = cpumask_first(cpus);
+               cpu_thermal_cap = arch_scale_cpu_capacity(cpu);
+               cpu_thermal_cap -= arch_scale_thermal_pressure(cpu);
+
+               eenv.cpu_cap = cpu_thermal_cap;
+               eenv.pd_cap = 0;
+
+               for_each_cpu(cpu, cpus) {
+                       eenv.pd_cap += cpu_thermal_cap;
+
+                       if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
+                               continue;
+
                        if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                                continue;
 
                        util = cpu_util_next(cpu, p, cpu);
                        cpu_cap = capacity_of(cpu);
-                       spare_cap = cpu_cap;
-                       lsub_positive(&spare_cap, util);
 
                        /*
                         * Skip CPUs that cannot satisfy the capacity request.
                        if (!fits_capacity(util, cpu_cap))
                                continue;
 
+                       lsub_positive(&cpu_cap, util);
+
                        if (cpu == prev_cpu) {
                                /* Always use prev_cpu as a candidate. */
                                compute_prev_delta = true;
-                       } else if (spare_cap > max_spare_cap) {
+                       } else if (cpu_cap > max_spare_cap) {
                                /*
                                 * Find the CPU with the maximum spare capacity
                                 * in the performance domain.
                                 */
-                               max_spare_cap = spare_cap;
+                               max_spare_cap = cpu_cap;
                                max_spare_cap_cpu = cpu;
                        }
                }
                if (max_spare_cap_cpu < 0 && !compute_prev_delta)
                        continue;
 
+               eenv_pd_busy_time(&eenv, cpus, p);
                /* Compute the 'base' energy of the pd, without @p */
-               base_energy_pd = compute_energy(p, -1, cpus, pd);
+               base_energy_pd = compute_energy(&eenv, pd, cpus, p, -1);
                base_energy += base_energy_pd;
 
                /* Evaluate the energy impact of using prev_cpu. */
                if (compute_prev_delta) {
-                       prev_delta = compute_energy(p, prev_cpu, cpus, pd);
+                       prev_delta = compute_energy(&eenv, pd, cpus, p,
+                                                   prev_cpu);
+                       /* CPU utilization has changed */
                        if (prev_delta < base_energy_pd)
                                goto unlock;
                        prev_delta -= base_energy_pd;
 
                /* Evaluate the energy impact of using max_spare_cap_cpu. */
                if (max_spare_cap_cpu >= 0) {
-                       cur_delta = compute_energy(p, max_spare_cap_cpu, cpus,
-                                                  pd);
+                       cur_delta = compute_energy(&eenv, pd, cpus, p,
+                                                  max_spare_cap_cpu);
+                       /* CPU utilization has changed */
                        if (cur_delta < base_energy_pd)
                                goto unlock;
                        cur_delta -= base_energy_pd;