]> www.infradead.org Git - users/jedix/linux-maple.git/commitdiff
sparc64: Improve boot time by per cpu map update
authorAtish Patra <atish.patra@oracle.com>
Mon, 13 Feb 2017 02:32:58 +0000 (19:32 -0700)
committerChuck Anderson <chuck.anderson@oracle.com>
Thu, 6 Apr 2017 07:13:55 +0000 (00:13 -0700)
Currently, smp_fill_in_sib_core_maps is invoked during cpu_up to setup
all the core/sibling map correctly. This happens in the order of O(n^2)
as it iterates over all the online cpus twice when each cpu comes online.
This increases smp_init() execution time exponentially leading to a
higher boot time.

Optimize the code path by comparing only the current cpu with online
cpus and set the maps for both the cpus simultaneously. Take this
opportunity to merge all three for loops into one as well. Here is
the smp_init() time after and before the fix.

Number of cpus:    before fix: after the fix:
512    2.30s .283s
1024    14.23s .493s

Orabug: 25496463

Signed-off-by: Atish Patra <atish.patra@oracle.com>
Reviewed-by: Pavel Tatashin <pasha.tatashin@oracle.com>
Reviewed-by: Allen Pais <allen.pais@oracle.com>
Signed-off-by: Allen Pais <allen.pais@oracle.com>
arch/sparc/kernel/smp_64.c

index 9bab2982d17e10e47c4d41aac6d607e54b34133e..8b18c56717ba43a61a82adccbef9056294a3903f 100644 (file)
@@ -1262,52 +1262,64 @@ void __init smp_fill_in_cpu_possible_map(void)
                set_cpu_possible(i, false);
 }
 
-void smp_fill_in_sib_core_maps(void)
+void smp_setup_sib_core_map(unsigned int cpu)
 {
-       unsigned int i, j;
+       unsigned int i;
 
-       for_each_online_cpu(i) {
-               cpumask_clear(&cpu_core_map[i]);
-               if (cpu_data(i).core_id == 0) {
-                       cpumask_set_cpu(i, &cpu_core_map[i]);
-                       continue;
-               }
-
-               for_each_online_cpu(j) {
-                       if (cpu_data(i).core_id == cpu_data(j).core_id)
-                               cpumask_set_cpu(j, &cpu_core_map[i]);
+       cpumask_clear(&cpu_core_map[cpu]);
+       if (cpu_data(cpu).core_id == 0) {
+               cpumask_set_cpu(cpu, &cpu_core_map[cpu]);
+       } else {
+               for_each_online_cpu(i) {
+                       if (cpu_data(cpu).core_id == cpu_data(i).core_id) {
+                               cpumask_set_cpu(i, &cpu_core_map[cpu]);
+                               cpumask_set_cpu(cpu, &cpu_core_map[i]);
+                       }
                }
        }
+       cpumask_clear(&cpu_core_sib_map[cpu]);
+       if (cpu_data(cpu).sock_id == -1) {
+               cpumask_set_cpu(cpu, &cpu_core_sib_map[cpu]);
+       } else {
 
-       for_each_online_cpu(i)  {
-               cpumask_clear(&cpu_core_sib_map[i]);
-               if (cpu_data(i).sock_id == -1) {
-                       cpumask_set_cpu(i, &cpu_core_sib_map[i]);
-                       continue;
-               }
+               for_each_online_cpu(i)  {
+                       if (cpu_data(cpu).max_cache_id ==
+                           cpu_data(i).max_cache_id) {
+                               cpumask_set_cpu(i,
+                                       &cpu_core_sib_cache_map[cpu]);
 
-               for_each_online_cpu(j)  {
-                       if (cpu_data(i).max_cache_id ==
-                           cpu_data(j).max_cache_id)
-                               cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]);
+                               cpumask_set_cpu(cpu,
+                                       &cpu_core_sib_cache_map[i]);
+                       }
 
-                       if (cpu_data(i).sock_id == cpu_data(j).sock_id)
-                               cpumask_set_cpu(j, &cpu_core_sib_map[i]);
+                       if (cpu_data(cpu).sock_id == cpu_data(i).sock_id) {
+                               cpumask_set_cpu(i, &cpu_core_sib_map[cpu]);
+                               cpumask_set_cpu(cpu, &cpu_core_sib_map[i]);
+                       }
                }
        }
-
-       for_each_online_cpu(i) {
-               cpumask_clear(&per_cpu(cpu_sibling_map, i));
-               if (cpu_data(i).proc_id == -1) {
-                       cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
-                       continue;
+       cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
+       if (cpu_data(cpu).proc_id == -1) {
+               cpumask_set_cpu(cpu, &per_cpu(cpu_sibling_map, cpu));
+       } else {
+               for_each_online_cpu(i) {
+                       if (cpu_data(cpu).proc_id == cpu_data(i).proc_id) {
+                               cpumask_set_cpu(i, &per_cpu(cpu_sibling_map,
+                                       cpu));
+                               cpumask_set_cpu(cpu, &per_cpu(cpu_sibling_map,
+                                       i));
+                       }
                }
+       }
 
-               for_each_online_cpu(j) {
-                       if (cpu_data(i).proc_id == cpu_data(j).proc_id)
-                               cpumask_set_cpu(j, &per_cpu(cpu_sibling_map,
-                                               i));
-               }
+}
+
+void smp_fill_in_sib_core_maps(void)
+{
+       unsigned int i;
+
+       for_each_online_cpu(i) {
+               smp_setup_sib_core_map(i);
        }
 }
 
@@ -1327,7 +1339,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
                         */
                        if (tlb_type != hypervisor)
                                smp_synchronize_one_tick(cpu);
-                       smp_fill_in_sib_core_maps();
+                       smp_setup_sib_core_map(cpu);
                        cpu_map_rebuild();
                        sparc64_update_numa_mask(cpu);
                }