intel_engine_add_retire(engine, tl);
 }
 
+static void __signal_request(struct i915_request *rq, struct list_head *signals)
+{
+       GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
+       clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+       if (!__dma_fence_signal(&rq->fence))
+               return;
+
+       i915_request_get(rq);
+       list_add_tail(&rq->signal_link, signals);
+}
+
 static void signal_irq_work(struct irq_work *work)
 {
        struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work);
        if (b->irq_armed && list_empty(&b->signalers))
                __intel_breadcrumbs_disarm_irq(b);
 
+       list_splice_init(&b->signaled_requests, &signal);
+
        list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
                GEM_BUG_ON(list_empty(&ce->signals));
 
                                list_entry(pos, typeof(*rq), signal_link);
 
                        GEM_BUG_ON(!check_signal_order(ce, rq));
-
                        if (!__request_completed(rq))
                                break;
 
-                       GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
-                                            &rq->fence.flags));
-                       clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
-
-                       if (!__dma_fence_signal(&rq->fence))
-                               continue;
-
                        /*
                         * Queue for execution after dropping the signaling
                         * spinlock as the callback chain may end up adding
                         * more signalers to the same context or engine.
                         */
-                       i915_request_get(rq);
-                       list_add_tail(&rq->signal_link, &signal);
+                       __signal_request(rq, &signal);
                }
 
                /*
 
        spin_lock_init(&b->irq_lock);
        INIT_LIST_HEAD(&b->signalers);
+       INIT_LIST_HEAD(&b->signaled_requests);
 
        init_irq_work(&b->irq_work, signal_irq_work);
 }
        spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
+void intel_engine_transfer_stale_breadcrumbs(struct intel_engine_cs *engine,
+                                            struct intel_context *ce)
+{
+       struct intel_breadcrumbs *b = &engine->breadcrumbs;
+       unsigned long flags;
+
+       spin_lock_irqsave(&b->irq_lock, flags);
+       if (!list_empty(&ce->signals)) {
+               struct i915_request *rq, *next;
+
+               /* Queue for executing the signal callbacks in the irq_work */
+               list_for_each_entry_safe(rq, next, &ce->signals, signal_link) {
+                       GEM_BUG_ON(rq->engine != engine);
+                       GEM_BUG_ON(!__request_completed(rq));
+
+                       __signal_request(rq, &b->signaled_requests);
+               }
+
+               INIT_LIST_HEAD(&ce->signals);
+               list_del_init(&ce->signal_link);
+
+               irq_work_queue(&b->irq_work);
+       }
+       spin_unlock_irqrestore(&b->irq_lock, flags);
+}
+
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 {
 }
 
        return true;
 }
 
-static void virtual_xfer_breadcrumbs(struct virtual_engine *ve,
-                                    struct i915_request *rq)
+static void virtual_xfer_breadcrumbs(struct virtual_engine *ve)
 {
-       struct intel_engine_cs *old = ve->siblings[0];
-
-       /* All unattached (rq->engine == old) must already be completed */
-
-       spin_lock(&old->breadcrumbs.irq_lock);
-       if (!list_empty(&ve->context.signal_link)) {
-               list_del_init(&ve->context.signal_link);
-
-               /*
-                * We cannot acquire the new engine->breadcrumbs.irq_lock
-                * (as we are holding a breadcrumbs.irq_lock already),
-                * so attach this request to the signaler on submission.
-                * The queued irq_work will occur when we finally drop
-                * the engine->active.lock after dequeue.
-                */
-               set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags);
-
-               /* Also transfer the pending irq_work for the old breadcrumb. */
-               intel_engine_signal_breadcrumbs(rq->engine);
-       }
-       spin_unlock(&old->breadcrumbs.irq_lock);
+       /*
+        * All the outstanding signals on ve->siblings[0] must have
+        * been completed, just pending the interrupt handler. As those
+        * signals still refer to the old sibling (via rq->engine), we must
+        * transfer those to the old irq_worker to keep our locking
+        * consistent.
+        */
+       intel_engine_transfer_stale_breadcrumbs(ve->siblings[0], &ve->context);
 }
 
 #define for_each_waiter(p__, rq__) \
                                                                        engine);
 
                                if (!list_empty(&ve->context.signals))
-                                       virtual_xfer_breadcrumbs(ve, rq);
+                                       virtual_xfer_breadcrumbs(ve);
 
                                /*
                                 * Move the bound engine to the top of the list