cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 
+/*
+ * A logcal cpu mask containing only one VPE per core to
+ * reduce the number of IPIs on large MT systems.
+ */
+cpumask_t cpu_foreign_map __read_mostly;
+EXPORT_SYMBOL(cpu_foreign_map);
+
 /* representing cpus for which sibling maps can be computed */
 static cpumask_t cpu_sibling_setup_map;
 
        }
 }
 
+/*
+ * Calculate a new cpu_foreign_map mask whenever a
+ * new cpu appears or disappears.
+ */
+static inline void calculate_cpu_foreign_map(void)
+{
+       int i, k, core_present;
+       cpumask_t temp_foreign_map;
+
+       /* Re-calculate the mask */
+       for_each_online_cpu(i) {
+               core_present = 0;
+               for_each_cpu(k, &temp_foreign_map)
+                       if (cpu_data[i].package == cpu_data[k].package &&
+                           cpu_data[i].core == cpu_data[k].core)
+                               core_present = 1;
+               if (!core_present)
+                       cpumask_set_cpu(i, &temp_foreign_map);
+       }
+
+       cpumask_copy(&cpu_foreign_map, &temp_foreign_map);
+}
+
 struct plat_smp_ops *mp_ops;
 EXPORT_SYMBOL(mp_ops);
 
        set_cpu_sibling_map(cpu);
        set_cpu_core_map(cpu);
 
+       calculate_cpu_foreign_map();
+
        cpumask_set_cpu(cpu, &cpu_callin_map);
 
        synchronise_count_slave(cpu);
 static void stop_this_cpu(void *dummy)
 {
        /*
-        * Remove this CPU:
+        * Remove this CPU. Be a bit slow here and
+        * set the bits for every online CPU so we don't miss
+        * any IPI whilst taking this VPE down.
         */
+
+       cpumask_copy(&cpu_foreign_map, cpu_online_mask);
+
+       /* Make it visible to every other CPU */
+       smp_mb();
+
        set_cpu_online(smp_processor_id(), false);
+       calculate_cpu_foreign_map();
        local_irq_disable();
        while (1);
 }
        mp_ops->prepare_cpus(max_cpus);
        set_cpu_sibling_map(0);
        set_cpu_core_map(0);
+       calculate_cpu_foreign_map();
 #ifndef CONFIG_HOTPLUG_CPU
        init_cpu_present(cpu_possible_mask);
 #endif
 
 #include <asm/cacheflush.h> /* for run_uncached() */
 #include <asm/traps.h>
 #include <asm/dma-coherence.h>
+#include <asm/mips-cm.h>
 
 /*
  * Special Variant of smp_call_function for use by cache functions:
 {
        preempt_disable();
 
-#ifndef CONFIG_MIPS_MT_SMP
-       smp_call_function(func, info, 1);
-#endif
+       /*
+        * The Coherent Manager propagates address-based cache ops to other
+        * cores but not index-based ops. However, r4k_on_each_cpu is used
+        * in both cases so there is no easy way to tell what kind of op is
+        * executed to the other cores. The best we can probably do is
+        * to restrict that call when a CM is not present because both
+        * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops.
+        */
+       if (!mips_cm_present())
+               smp_call_function_many(&cpu_foreign_map, func, info, 1);
        func(info);
        preempt_enable();
 }