{
        unsigned long load, max_cpu_load, min_cpu_load, max_nr_running;
        int i;
-       unsigned int balance_cpu = -1, first_idle_cpu = 0;
+       unsigned int balance_cpu = -1;
+       unsigned long balance_load = ~0UL;
        unsigned long avg_load_per_task = 0;
 
        if (local_group)
 
                /* Bias balancing toward cpus of our domain */
                if (local_group) {
-                       if (idle_cpu(i) && !first_idle_cpu) {
-                               first_idle_cpu = 1;
+                       load = target_load(i, load_idx);
+                       if (load < balance_load || idle_cpu(i)) {
+                               balance_load = load;
                                balance_cpu = i;
                        }
-
-                       load = target_load(i, load_idx);
                } else {
                        load = source_load(i, load_idx);
                        if (load > max_cpu_load) {