#include <asm/pmu.h>
 #include <asm/stacktrace.h>
 
-/*
- * Hardware lock to serialize accesses to PMU registers. Needed for the
- * read/modify/write sequences.
- */
-static DEFINE_RAW_SPINLOCK(pmu_lock);
-
 /*
  * ARMv6 supports a maximum of 3 events, starting from index 0. If we add
  * another platform that supports more, we need to increase this to be the
         * an event. A 0 means that the counter can be used.
         */
        unsigned long           used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)];
+
+       /*
+        * Hardware lock to serialize accesses to PMU registers. Needed for the
+        * read/modify/write sequences.
+        */
+       raw_spinlock_t          pmu_lock;
 };
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
 
 
 static void __init cpu_pmu_init(struct arm_pmu *armpmu)
 {
+       int cpu;
+       for_each_possible_cpu(cpu) {
+               struct cpu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
+               raw_spin_lock_init(&events->pmu_lock);
+       }
        armpmu->get_hw_events = armpmu_get_cpu_events;
 }
 
 
                      int idx)
 {
        unsigned long val, mask, evt, flags;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
        if (ARMV6_CYCLE_COUNTER == idx) {
                mask    = 0;
         * Mask out the current event and set the counter to count the event
         * that we're interested in.
         */
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = armv6_pmcr_read();
        val &= ~mask;
        val |= evt;
        armv6_pmcr_write(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int counter_is_active(unsigned long pmcr, int idx)
 armv6pmu_start(void)
 {
        unsigned long flags, val;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = armv6_pmcr_read();
        val |= ARMV6_PMCR_ENABLE;
        armv6_pmcr_write(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 armv6pmu_stop(void)
 {
        unsigned long flags, val;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = armv6_pmcr_read();
        val &= ~ARMV6_PMCR_ENABLE;
        armv6_pmcr_write(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
                       int idx)
 {
        unsigned long val, mask, evt, flags;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
        if (ARMV6_CYCLE_COUNTER == idx) {
                mask    = ARMV6_PMCR_CCOUNT_IEN;
         * of ETM bus signal assertion cycles. The external reporting should
         * be disabled and so this should never increment.
         */
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = armv6_pmcr_read();
        val &= ~mask;
        val |= evt;
        armv6_pmcr_write(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
                              int idx)
 {
        unsigned long val, mask, flags, evt = 0;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
        if (ARMV6_CYCLE_COUNTER == idx) {
                mask    = ARMV6_PMCR_CCOUNT_IEN;
         * Unlike UP ARMv6, we don't have a way of stopping the counters. We
         * simply disable the interrupt reporting.
         */
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = armv6_pmcr_read();
        val &= ~mask;
        val |= evt;
        armv6_pmcr_write(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static struct arm_pmu armv6pmu = {
 
 static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
        unsigned long flags;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
        /*
         * Enable counter and interrupt, and set the counter to count
         * the event that we're interested in.
         */
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
        /*
         * Disable counter
         */
        armv7_pmnc_enable_counter(idx);
 
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
        unsigned long flags;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
        /*
         * Disable counter and interrupt
         */
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
 
        /*
         * Disable counter
         */
        armv7_pmnc_disable_intens(idx);
 
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
 static void armv7pmu_start(void)
 {
        unsigned long flags;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        /* Enable all counters */
        armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void armv7pmu_stop(void)
 {
        unsigned long flags;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        /* Disable all counters */
        armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
 
 xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
        unsigned long val, mask, evt, flags;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
        switch (idx) {
        case XSCALE_CYCLE_COUNTER:
                return;
        }
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = xscale1pmu_read_pmnc();
        val &= ~mask;
        val |= evt;
        xscale1pmu_write_pmnc(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
        unsigned long val, mask, evt, flags;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
        switch (idx) {
        case XSCALE_CYCLE_COUNTER:
                return;
        }
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = xscale1pmu_read_pmnc();
        val &= ~mask;
        val |= evt;
        xscale1pmu_write_pmnc(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
 xscale1pmu_start(void)
 {
        unsigned long flags, val;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = xscale1pmu_read_pmnc();
        val |= XSCALE_PMU_ENABLE;
        xscale1pmu_write_pmnc(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale1pmu_stop(void)
 {
        unsigned long flags, val;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = xscale1pmu_read_pmnc();
        val &= ~XSCALE_PMU_ENABLE;
        xscale1pmu_write_pmnc(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static inline u32
 xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
        unsigned long flags, ien, evtsel;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
        ien = xscale2pmu_read_int_enable();
        evtsel = xscale2pmu_read_event_select();
                return;
        }
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        xscale2pmu_write_event_select(evtsel);
        xscale2pmu_write_int_enable(ien);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
        unsigned long flags, ien, evtsel;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
        ien = xscale2pmu_read_int_enable();
        evtsel = xscale2pmu_read_event_select();
                return;
        }
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        xscale2pmu_write_event_select(evtsel);
        xscale2pmu_write_int_enable(ien);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static int
 xscale2pmu_start(void)
 {
        unsigned long flags, val;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
        val |= XSCALE_PMU_ENABLE;
        xscale2pmu_write_pmnc(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static void
 xscale2pmu_stop(void)
 {
        unsigned long flags, val;
+       struct cpu_hw_events *events = armpmu->get_hw_events();
 
-       raw_spin_lock_irqsave(&pmu_lock, flags);
+       raw_spin_lock_irqsave(&events->pmu_lock, flags);
        val = xscale2pmu_read_pmnc();
        val &= ~XSCALE_PMU_ENABLE;
        xscale2pmu_write_pmnc(val);
-       raw_spin_unlock_irqrestore(&pmu_lock, flags);
+       raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 }
 
 static inline u32