event->pmu->del(event, 0);
        event->oncpu = -1;
 
-       if (event->pending_disable) {
-               event->pending_disable = 0;
+       if (READ_ONCE(event->pending_disable) >= 0) {
+               WRITE_ONCE(event->pending_disable, -1);
                state = PERF_EVENT_STATE_OFF;
        }
        perf_event_set_state(event, state);
 
 void perf_event_disable_inatomic(struct perf_event *event)
 {
-       event->pending_disable = 1;
+       WRITE_ONCE(event->pending_disable, smp_processor_id());
+       /* can fail, see perf_pending_event_disable() */
        irq_work_queue(&event->pending);
 }
 
        }
 }
 
+static void perf_pending_event_disable(struct perf_event *event)
+{
+       int cpu = READ_ONCE(event->pending_disable);
+
+       if (cpu < 0)
+               return;
+
+       if (cpu == smp_processor_id()) {
+               WRITE_ONCE(event->pending_disable, -1);
+               perf_event_disable_local(event);
+               return;
+       }
+
+       /*
+        *  CPU-A                       CPU-B
+        *
+        *  perf_event_disable_inatomic()
+        *    @pending_disable = CPU-A;
+        *    irq_work_queue();
+        *
+        *  sched-out
+        *    @pending_disable = -1;
+        *
+        *                              sched-in
+        *                              perf_event_disable_inatomic()
+        *                                @pending_disable = CPU-B;
+        *                                irq_work_queue(); // FAILS
+        *
+        *  irq_work_run()
+        *    perf_pending_event()
+        *
+        * But the event runs on CPU-B and wants disabling there.
+        */
+       irq_work_queue_on(&event->pending, cpu);
+}
+
 static void perf_pending_event(struct irq_work *entry)
 {
-       struct perf_event *event = container_of(entry,
-                       struct perf_event, pending);
+       struct perf_event *event = container_of(entry, struct perf_event, pending);
        int rctx;
 
        rctx = perf_swevent_get_recursion_context();
         * and we won't recurse 'further'.
         */
 
-       if (event->pending_disable) {
-               event->pending_disable = 0;
-               perf_event_disable_local(event);
-       }
+       perf_pending_event_disable(event);
 
        if (event->pending_wakeup) {
                event->pending_wakeup = 0;
 
 
        init_waitqueue_head(&event->waitq);
+       event->pending_disable = -1;
        init_irq_work(&event->pending, perf_pending_event);
 
        mutex_init(&event->mmap_mutex);
 
                 * store that will be enabled on successful return
                 */
                if (!handle->size) { /* A, matches D */
-                       event->pending_disable = 1;
+                       event->pending_disable = smp_processor_id();
                        perf_output_wakeup(handle);
                        local_set(&rb->aux_nest, 0);
                        goto err_put;
 
        if (wakeup) {
                if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
-                       handle->event->pending_disable = 1;
+                       handle->event->pending_disable = smp_processor_id();
                perf_output_wakeup(handle);
        }