From 5f67cbb420a412dac3e06ef48584b20de67032b1 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Sun, 12 Feb 2017 19:32:58 -0700 Subject: [PATCH] sparc64: Improve boot time by per cpu map update Currently, smp_fill_in_sib_core_maps is invoked during cpu_up to setup all the core/sibling map correctly. This happens in the order of O(n^2) as it iterates over all the online cpus twice when each cpu comes online. This increases smp_init() execution time exponentially leading to a higher boot time. Optimize the code path by comparing only the current cpu with online cpus and set the maps for both the cpus simultaneously. Take this opportunity to merge all three for loops into one as well. Here is the smp_init() time after and before the fix. Number of cpus: before fix: after the fix: 512 2.30s .283s 1024 14.23s .493s Orabug: 25496463 Signed-off-by: Atish Patra Reviewed-by: Pavel Tatashin Reviewed-by: Allen Pais Signed-off-by: Allen Pais --- arch/sparc/kernel/smp_64.c | 84 ++++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 36 deletions(-) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 9bab2982d17e..8b18c56717ba 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -1262,52 +1262,64 @@ void __init smp_fill_in_cpu_possible_map(void) set_cpu_possible(i, false); } -void smp_fill_in_sib_core_maps(void) +void smp_setup_sib_core_map(unsigned int cpu) { - unsigned int i, j; + unsigned int i; - for_each_online_cpu(i) { - cpumask_clear(&cpu_core_map[i]); - if (cpu_data(i).core_id == 0) { - cpumask_set_cpu(i, &cpu_core_map[i]); - continue; - } - - for_each_online_cpu(j) { - if (cpu_data(i).core_id == cpu_data(j).core_id) - cpumask_set_cpu(j, &cpu_core_map[i]); + cpumask_clear(&cpu_core_map[cpu]); + if (cpu_data(cpu).core_id == 0) { + cpumask_set_cpu(cpu, &cpu_core_map[cpu]); + } else { + for_each_online_cpu(i) { + if (cpu_data(cpu).core_id == cpu_data(i).core_id) { + cpumask_set_cpu(i, &cpu_core_map[cpu]); + cpumask_set_cpu(cpu, &cpu_core_map[i]); + } } } + cpumask_clear(&cpu_core_sib_map[cpu]); + if (cpu_data(cpu).sock_id == -1) { + cpumask_set_cpu(cpu, &cpu_core_sib_map[cpu]); + } else { - for_each_online_cpu(i) { - cpumask_clear(&cpu_core_sib_map[i]); - if (cpu_data(i).sock_id == -1) { - cpumask_set_cpu(i, &cpu_core_sib_map[i]); - continue; - } + for_each_online_cpu(i) { + if (cpu_data(cpu).max_cache_id == + cpu_data(i).max_cache_id) { + cpumask_set_cpu(i, + &cpu_core_sib_cache_map[cpu]); - for_each_online_cpu(j) { - if (cpu_data(i).max_cache_id == - cpu_data(j).max_cache_id) - cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]); + cpumask_set_cpu(cpu, + &cpu_core_sib_cache_map[i]); + } - if (cpu_data(i).sock_id == cpu_data(j).sock_id) - cpumask_set_cpu(j, &cpu_core_sib_map[i]); + if (cpu_data(cpu).sock_id == cpu_data(i).sock_id) { + cpumask_set_cpu(i, &cpu_core_sib_map[cpu]); + cpumask_set_cpu(cpu, &cpu_core_sib_map[i]); + } } } - - for_each_online_cpu(i) { - cpumask_clear(&per_cpu(cpu_sibling_map, i)); - if (cpu_data(i).proc_id == -1) { - cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i)); - continue; + cpumask_clear(&per_cpu(cpu_sibling_map, cpu)); + if (cpu_data(cpu).proc_id == -1) { + cpumask_set_cpu(cpu, &per_cpu(cpu_sibling_map, cpu)); + } else { + for_each_online_cpu(i) { + if (cpu_data(cpu).proc_id == cpu_data(i).proc_id) { + cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, + cpu)); + cpumask_set_cpu(cpu, &per_cpu(cpu_sibling_map, + i)); + } } + } - for_each_online_cpu(j) { - if (cpu_data(i).proc_id == cpu_data(j).proc_id) - cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, - i)); - } +} + +void smp_fill_in_sib_core_maps(void) +{ + unsigned int i; + + for_each_online_cpu(i) { + smp_setup_sib_core_map(i); } } @@ -1327,7 +1339,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) */ if (tlb_type != hypervisor) smp_synchronize_one_tick(cpu); - smp_fill_in_sib_core_maps(); + smp_setup_sib_core_map(cpu); cpu_map_rebuild(); sparc64_update_numa_mask(cpu); } -- 2.50.1