{
        struct sched_group *idlest = NULL, *group = sd->groups;
        struct sched_group *most_spare_sg = NULL;
-       unsigned long min_load = ULONG_MAX, this_load = 0;
+       unsigned long min_runnable_load = ULONG_MAX, this_runnable_load = 0;
+       unsigned long min_avg_load = ULONG_MAX, this_avg_load = 0;
        unsigned long most_spare = 0, this_spare = 0;
        int load_idx = sd->forkexec_idx;
-       int imbalance = 100 + (sd->imbalance_pct-100)/2;
+       int imbalance_scale = 100 + (sd->imbalance_pct-100)/2;
+       unsigned long imbalance = scale_load_down(NICE_0_LOAD) *
+                               (sd->imbalance_pct-100) / 100;
 
        if (sd_flag & SD_BALANCE_WAKE)
                load_idx = sd->wake_idx;
 
        do {
-               unsigned long load, avg_load, spare_cap, max_spare_cap;
+               unsigned long load, avg_load, runnable_load;
+               unsigned long spare_cap, max_spare_cap;
                int local_group;
                int i;
 
                 * the group containing the CPU with most spare capacity.
                 */
                avg_load = 0;
+               runnable_load = 0;
                max_spare_cap = 0;
 
                for_each_cpu(i, sched_group_cpus(group)) {
                        else
                                load = target_load(i, load_idx);
 
-                       avg_load += load;
+                       runnable_load += load;
+
+                       avg_load += cfs_rq_load_avg(&cpu_rq(i)->cfs);
 
                        spare_cap = capacity_spare_wake(i, p);
 
                }
 
                /* Adjust by relative CPU capacity of the group */
-               avg_load = (avg_load * SCHED_CAPACITY_SCALE) / group->sgc->capacity;
+               avg_load = (avg_load * SCHED_CAPACITY_SCALE) /
+                                       group->sgc->capacity;
+               runnable_load = (runnable_load * SCHED_CAPACITY_SCALE) /
+                                       group->sgc->capacity;
 
                if (local_group) {
-                       this_load = avg_load;
+                       this_runnable_load = runnable_load;
+                       this_avg_load = avg_load;
                        this_spare = max_spare_cap;
                } else {
-                       if (avg_load < min_load) {
-                               min_load = avg_load;
+                       if (min_runnable_load > (runnable_load + imbalance)) {
+                               /*
+                                * The runnable load is significantly smaller
+                                * so we can pick this new cpu
+                                */
+                               min_runnable_load = runnable_load;
+                               min_avg_load = avg_load;
+                               idlest = group;
+                       } else if ((runnable_load < (min_runnable_load + imbalance)) &&
+                                  (100*min_avg_load > imbalance_scale*avg_load)) {
+                               /*
+                                * The runnable loads are close so take the
+                                * blocked load into account through avg_load.
+                                */
+                               min_avg_load = avg_load;
                                idlest = group;
                        }
 
                goto skip_spare;
 
        if (this_spare > task_util(p) / 2 &&
-           imbalance*this_spare > 100*most_spare)
+           imbalance_scale*this_spare > 100*most_spare)
                return NULL;
-       else if (most_spare > task_util(p) / 2)
+
+       if (most_spare > task_util(p) / 2)
                return most_spare_sg;
 
 skip_spare:
-       if (!idlest || 100*this_load < imbalance*min_load)
+       if (!idlest)
+               return NULL;
+
+       if (min_runnable_load > (this_runnable_load + imbalance))
                return NULL;
+
+       if ((this_runnable_load < (min_runnable_load + imbalance)) &&
+            (100*this_avg_load < imbalance_scale*min_avg_load))
+               return NULL;
+
        return idlest;
 }