#include <asm/smp_plat.h>
 #include <asm/tlbflush.h>
 
-static void on_each_cpu_mask(void (*func)(void *), void *info, int wait,
-       const struct cpumask *mask)
-{
-       preempt_disable();
-
-       smp_call_function_many(mask, func, info, wait);
-       if (cpumask_test_cpu(smp_processor_id(), mask))
-               func(info);
-
-       preempt_enable();
-}
-
 /**********************************************************************/
 
 /*
 void flush_tlb_mm(struct mm_struct *mm)
 {
        if (tlb_ops_need_broadcast())
-               on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm));
+               on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1);
        else
                local_flush_tlb_mm(mm);
 }
                struct tlb_args ta;
                ta.ta_vma = vma;
                ta.ta_start = uaddr;
-               on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm));
+               on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page,
+                                       &ta, 1);
        } else
                local_flush_tlb_page(vma, uaddr);
 }
                ta.ta_vma = vma;
                ta.ta_start = start;
                ta.ta_end = end;
-               on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm));
+               on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range,
+                                       &ta, 1);
        } else
                local_flush_tlb_range(vma, start, end);
 }
 
 /* Boot a secondary cpu */
 void online_secondary(void);
 
-/* Call a function on a specified set of CPUs (may include this one). */
-extern void on_each_cpu_mask(const struct cpumask *mask,
-                            void (*func)(void *), void *info, bool wait);
-
 /* Topology of the supervisor tile grid, and coordinates of boot processor */
 extern HV_Topology smp_topology;
 
 
 #else /* !CONFIG_SMP */
 
-#define on_each_cpu_mask(mask, func, info, wait)               \
-  do { if (cpumask_test_cpu(0, (mask))) func(info); } while (0)
-
 #define smp_master_cpu         0
 #define smp_height             1
 #define smp_width              1
 
        send_IPI_many(&mask, tag);
 }
 
-
-/*
- * Provide smp_call_function_mask, but also run function locally
- * if specified in the mask.
- */
-void on_each_cpu_mask(const struct cpumask *mask, void (*func)(void *),
-                     void *info, bool wait)
-{
-       int cpu = get_cpu();
-       smp_call_function_many(mask, func, info, wait);
-       if (cpumask_test_cpu(cpu, mask)) {
-               local_irq_disable();
-               func(info);
-               local_irq_enable();
-       }
-       put_cpu();
-}
-
-
 /*
  * Functions related to starting/stopping cpus.
  */
 
  */
 int on_each_cpu(smp_call_func_t func, void *info, int wait);
 
+/*
+ * Call a function on processors specified by mask, which might include
+ * the local one.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
+               void *info, bool wait);
+
 /*
  * Mark the boot cpu "online" so that it can call console drivers in
  * printk() and can access its per-cpu storage.
                local_irq_enable();             \
                0;                              \
        })
+/*
+ * Note we still need to test the mask even for UP
+ * because we actually can get an empty mask from
+ * code that on SMP might call us without the local
+ * CPU in the mask.
+ */
+#define on_each_cpu_mask(mask, func, info, wait) \
+       do {                                            \
+               if (cpumask_test_cpu(0, (mask))) {      \
+                       local_irq_disable();            \
+                       (func)(info);                   \
+                       local_irq_enable();             \
+               }                                       \
+       } while (0)
+
 static inline void smp_send_reschedule(int cpu) { }
 #define num_booting_cpus()                     1
 #define smp_prepare_boot_cpu()                 do {} while (0)
 
        return ret;
 }
 EXPORT_SYMBOL(on_each_cpu);
+
+/**
+ * on_each_cpu_mask(): Run a function on processors specified by
+ * cpumask, which may include the local processor.
+ * @mask: The set of cpus to run on (only runs on online subset).
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed
+ *        on other CPUs.
+ *
+ * If @wait is true, then returns once @func has returned.
+ *
+ * You must not call this function with disabled interrupts or
+ * from a hardware interrupt handler or from a bottom half handler.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
+                       void *info, bool wait)
+{
+       int cpu = get_cpu();
+
+       smp_call_function_many(mask, func, info, wait);
+       if (cpumask_test_cpu(cpu, mask)) {
+               local_irq_disable();
+               func(info);
+               local_irq_enable();
+       }
+       put_cpu();
+}
+EXPORT_SYMBOL(on_each_cpu_mask);