#include "blk.h"
 #include "blk-mq.h"
 
-static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
-                             const int cpu)
+static int cpu_to_queue_index(unsigned int nr_queues, const int cpu,
+                             const struct cpumask *online_mask)
 {
-       return cpu * nr_queues / nr_cpus;
+       /*
+        * Non online CPU will be mapped to queue index 0.
+        */
+       if (!cpumask_test_cpu(cpu, online_mask))
+               return 0;
+       return cpu % nr_queues;
 }
 
 static int get_first_sibling(unsigned int cpu)
        unsigned int *map = set->mq_map;
        unsigned int nr_queues = set->nr_hw_queues;
        const struct cpumask *online_mask = cpu_online_mask;
-       unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling;
-       cpumask_var_t cpus;
-
-       if (!alloc_cpumask_var(&cpus, GFP_ATOMIC))
-               return -ENOMEM;
-
-       cpumask_clear(cpus);
-       nr_cpus = nr_uniq_cpus = 0;
-       for_each_cpu(i, online_mask) {
-               nr_cpus++;
-               first_sibling = get_first_sibling(i);
-               if (!cpumask_test_cpu(first_sibling, cpus))
-                       nr_uniq_cpus++;
-               cpumask_set_cpu(i, cpus);
-       }
-
-       queue = 0;
-       for_each_possible_cpu(i) {
-               if (!cpumask_test_cpu(i, online_mask)) {
-                       map[i] = 0;
-                       continue;
-               }
+       unsigned int cpu, first_sibling;
 
+       for_each_possible_cpu(cpu) {
                /*
-                * Easy case - we have equal or more hardware queues. Or
-                * there are no thread siblings to take into account. Do
-                * 1:1 if enough, or sequential mapping if less.
+                * First do sequential mapping between CPUs and queues.
+                * In case we still have CPUs to map, and we have some number of
+                * threads per cores then map sibling threads to the same queue for
+                * performace optimizations.
                 */
-               if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) {
-                       map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue);
-                       queue++;
-                       continue;
+               if (cpu < nr_queues) {
+                       map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+               } else {
+                       first_sibling = get_first_sibling(cpu);
+                       if (first_sibling == cpu)
+                               map[cpu] = cpu_to_queue_index(nr_queues, cpu, online_mask);
+                       else
+                               map[cpu] = map[first_sibling];
                }
-
-               /*
-                * Less then nr_cpus queues, and we have some number of
-                * threads per cores. Map sibling threads to the same
-                * queue.
-                */
-               first_sibling = get_first_sibling(i);
-               if (first_sibling == i) {
-                       map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues,
-                                                       queue);
-                       queue++;
-               } else
-                       map[i] = map[first_sibling];
        }
 
-       free_cpumask_var(cpus);
        return 0;
 }
 EXPORT_SYMBOL_GPL(blk_mq_map_queues);