return per_cpu_ptr(bp_cpuinfo + type, cpu);
 }
 
+/* Number of pinned CPU breakpoints globally. */
+static struct bp_slots_histogram cpu_pinned[TYPE_MAX];
+
 /* Keep track of the breakpoints attached to tasks */
 static struct rhltable task_bps_ht;
 static const struct rhashtable_params task_bps_ht_params = {
                                goto err;
                }
        }
+       for (i = 0; i < TYPE_MAX; i++) {
+               if (!bp_slots_histogram_alloc(&cpu_pinned[i], i))
+                       goto err;
+       }
 
        return 0;
 err:
                if (err_cpu == cpu)
                        break;
        }
+       for (i = 0; i < TYPE_MAX; i++)
+               bp_slots_histogram_free(&cpu_pinned[i]);
 
        return -ENOMEM;
 }
 /*
  * Count the number of breakpoints of the same type and same task.
  * The given event must be not on the list.
+ *
+ * If @cpu is -1, but the result of task_bp_pinned() is not CPU-independent,
+ * returns a negative value.
  */
 static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
 {
                goto out;
 
        rhl_for_each_entry_rcu(iter, pos, head, hw.bp_list) {
-               if (find_slot_idx(iter->attr.bp_type) == type &&
-                   (iter->cpu < 0 || cpu == iter->cpu))
-                       count += hw_breakpoint_weight(iter);
+               if (find_slot_idx(iter->attr.bp_type) != type)
+                       continue;
+
+               if (iter->cpu >= 0) {
+                       if (cpu == -1) {
+                               count = -1;
+                               goto out;
+                       } else if (cpu != iter->cpu)
+                               continue;
+               }
+
+               count += hw_breakpoint_weight(iter);
        }
 
 out:
        int pinned_slots = 0;
        int cpu;
 
+       if (bp->hw.target && bp->cpu < 0) {
+               int max_pinned = task_bp_pinned(-1, bp, type);
+
+               if (max_pinned >= 0) {
+                       /*
+                        * Fast path: task_bp_pinned() is CPU-independent and
+                        * returns the same value for any CPU.
+                        */
+                       max_pinned += bp_slots_histogram_max(&cpu_pinned[type], type);
+                       return max_pinned;
+               }
+       }
+
        for_each_cpu(cpu, cpumask) {
                struct bp_cpuinfo *info = get_bp_info(cpu, type);
                int nr;
 
        /* Pinned counter cpu profiling */
        if (!bp->hw.target) {
+               struct bp_cpuinfo *info = get_bp_info(bp->cpu, type);
+
                lockdep_assert_held_write(&bp_cpuinfo_sem);
-               get_bp_info(bp->cpu, type)->cpu_pinned += weight;
+               bp_slots_histogram_add(&cpu_pinned[type], info->cpu_pinned, weight);
+               info->cpu_pinned += weight;
                return 0;
        }
 
                }
        }
 
+       for (int type = 0; type < TYPE_MAX; ++type) {
+               for (int slot = 0; slot < hw_breakpoint_slots_cached(type); ++slot) {
+                       /*
+                        * Warn, because if there are CPU pinned counters,
+                        * should never get here; bp_cpuinfo::cpu_pinned should
+                        * be consistent with the global cpu_pinned histogram.
+                        */
+                       if (WARN_ON(atomic_read(&cpu_pinned[type].count[slot])))
+                               return true;
+               }
+       }
+
        return false;
 }