&cpu_pmu->node);
 }
 
-static struct arm_pmu *__armpmu_alloc(gfp_t flags)
+struct arm_pmu *armpmu_alloc(void)
 {
        struct arm_pmu *pmu;
        int cpu;
 
-       pmu = kzalloc(sizeof(*pmu), flags);
+       pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
        if (!pmu)
                goto out;
 
-       pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, flags);
+       pmu->hw_events = alloc_percpu_gfp(struct pmu_hw_events, GFP_KERNEL);
        if (!pmu->hw_events) {
                pr_info("failed to allocate per-cpu PMU data.\n");
                goto out_free_pmu;
        return NULL;
 }
 
-struct arm_pmu *armpmu_alloc(void)
-{
-       return __armpmu_alloc(GFP_KERNEL);
-}
-
-struct arm_pmu *armpmu_alloc_atomic(void)
-{
-       return __armpmu_alloc(GFP_ATOMIC);
-}
-
-
 void armpmu_free(struct arm_pmu *pmu)
 {
        free_percpu(pmu->hw_events);
 
 #include <linux/percpu.h>
 #include <linux/perf/arm_pmu.h>
 
+#include <asm/cpu.h>
 #include <asm/cputype.h>
 
 static DEFINE_PER_CPU(struct arm_pmu *, probed_pmus);
        return NULL;
 }
 
-static struct arm_pmu *arm_pmu_acpi_find_alloc_pmu(void)
-{
-       struct arm_pmu *pmu;
-
-       pmu = arm_pmu_acpi_find_pmu();
-       if (pmu)
-               return pmu;
-
-       pmu = armpmu_alloc_atomic();
-       if (!pmu) {
-               pr_warn("Unable to allocate PMU for CPU%d\n",
-                       smp_processor_id());
-               return NULL;
-       }
-
-       pmu->acpi_cpuid = read_cpuid_id();
-
-       return pmu;
-}
-
 /*
  * Check whether the new IRQ is compatible with those already associated with
  * the PMU (e.g. we don't have mismatched PPIs).
        if (per_cpu(probed_pmus, cpu))
                return 0;
 
-       pmu = arm_pmu_acpi_find_alloc_pmu();
-       if (!pmu)
-               return -ENOMEM;
+       pmu = arm_pmu_acpi_find_pmu();
+       if (!pmu) {
+               pr_warn_ratelimited("Unable to associate CPU%d with a PMU\n",
+                                   cpu);
+               return 0;
+       }
 
        arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
-
-       /*
-        * Ideally, we'd probe the PMU here when we find the first matching
-        * CPU. We can't do that for several reasons; see the comment in
-        * arm_pmu_acpi_init().
-        *
-        * So for the time being, we're done.
-        */
        return 0;
 }
 
+static void arm_pmu_acpi_probe_matching_cpus(struct arm_pmu *pmu,
+                                            unsigned long cpuid)
+{
+       int cpu;
+
+       for_each_online_cpu(cpu) {
+               unsigned long cpu_cpuid = per_cpu(cpu_data, cpu).reg_midr;
+
+               if (cpu_cpuid == cpuid)
+                       arm_pmu_acpi_associate_pmu_cpu(pmu, cpu);
+       }
+}
+
 int arm_pmu_acpi_probe(armpmu_init_fn init_fn)
 {
        int pmu_idx = 0;
-       int cpu, ret;
+       unsigned int cpu;
+       int ret;
+
+       ret = arm_pmu_acpi_parse_irqs();
+       if (ret)
+               return ret;
+
+       ret = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_ARM_ACPI_STARTING,
+                                       "perf/arm/pmu_acpi:starting",
+                                       arm_pmu_acpi_cpu_starting, NULL);
+       if (ret)
+               return ret;
 
        /*
         * Initialise and register the set of PMUs which we know about right
         * For the moment, as with the platform/DT case, we need at least one
         * of a PMU's CPUs to be online at probe time.
         */
-       for_each_possible_cpu(cpu) {
+       for_each_online_cpu(cpu) {
                struct arm_pmu *pmu = per_cpu(probed_pmus, cpu);
+               unsigned long cpuid;
                char *base_name;
 
-               if (!pmu || pmu->name)
+               /* If we've already probed this CPU, we have nothing to do */
+               if (pmu)
                        continue;
 
+               pmu = armpmu_alloc();
+               if (!pmu) {
+                       pr_warn("Unable to allocate PMU for CPU%d\n",
+                               cpu);
+               }
+
+               cpuid = per_cpu(cpu_data, cpu).reg_midr;
+               pmu->acpi_cpuid = cpuid;
+
+               arm_pmu_acpi_probe_matching_cpus(pmu, cpuid);
+
                ret = init_fn(pmu);
                if (ret == -ENODEV) {
                        /* PMU not handled by this driver, or not present */
                }
        }
 
-       return 0;
+       return ret;
 }
 
 static int arm_pmu_acpi_init(void)
 {
-       int ret;
-
        if (acpi_disabled)
                return 0;
 
        arm_spe_acpi_register_device();
 
-       ret = arm_pmu_acpi_parse_irqs();
-       if (ret)
-               return ret;
-
-       ret = cpuhp_setup_state(CPUHP_AP_PERF_ARM_ACPI_STARTING,
-                               "perf/arm/pmu_acpi:starting",
-                               arm_pmu_acpi_cpu_starting, NULL);
-
-       return ret;
+       return 0;
 }
 subsys_initcall(arm_pmu_acpi_init)