seq_printf(m, "GT active? %s\n", yesno(dev_priv->gt.awake));
 
        for_each_engine(engine, dev_priv, id) {
-               struct intel_breadcrumbs *b = &engine->breadcrumbs;
-               struct rb_node *rb;
-
                seq_printf(m, "%s:\n", engine->name);
                seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
                           engine->hangcheck.seqno, seqno[id],
                           intel_engine_last_submit(engine),
                           jiffies_to_msecs(jiffies -
                                            engine->hangcheck.action_timestamp));
-               seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
-                          yesno(intel_engine_has_waiter(engine)),
+               seq_printf(m, "\tfake irq active? %s\n",
                           yesno(test_bit(engine->id,
                                          &dev_priv->gpu_error.missed_irq_rings)));
 
-               spin_lock_irq(&b->rb_lock);
-               for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-                       struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-                       seq_printf(m, "\t%s [%d] waiting for %x\n",
-                                  w->tsk->comm, w->tsk->pid, w->seqno);
-               }
-               spin_unlock_irq(&b->rb_lock);
-
                seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
                           (long long)engine->hangcheck.acthd,
                           (long long)acthd[id]);
        return 0;
 }
 
-static int count_irq_waiters(struct drm_i915_private *i915)
-{
-       struct intel_engine_cs *engine;
-       enum intel_engine_id id;
-       int count = 0;
-
-       for_each_engine(engine, i915, id)
-               count += intel_engine_has_waiter(engine);
-
-       return count;
-}
-
 static const char *rps_power_to_str(unsigned int power)
 {
        static const char * const strings[] = {
        seq_printf(m, "RPS enabled? %d\n", rps->enabled);
        seq_printf(m, "GPU busy? %s [%d requests]\n",
                   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
-       seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv));
        seq_printf(m, "Boosts outstanding? %d\n",
                   atomic_read(&rps->num_waiters));
        seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
 
                   struct intel_engine_cs *engine)
 {
        ce->gem_context = ctx;
+
+       INIT_LIST_HEAD(&ce->signal_link);
+       INIT_LIST_HEAD(&ce->signals);
 }
 
 static struct i915_gem_context *
 
        struct intel_context {
                struct i915_gem_context *gem_context;
                struct intel_engine_cs *active;
+               struct list_head signal_link;
+               struct list_head signals;
                struct i915_vma *state;
                struct intel_ring *ring;
                u32 *lrc_reg_state;
 
        if (!erq->seqno)
                return;
 
-       err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
+       err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x%s%s, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
                   prefix, erq->pid, erq->ban_score,
-                  erq->context, erq->seqno, erq->sched_attr.priority,
+                  erq->context, erq->seqno,
+                  test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+                           &erq->flags) ? "!" : "",
+                  test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+                           &erq->flags) ? "+" : "",
+                  erq->sched_attr.priority,
                   jiffies_to_msecs(erq->jiffies - epoch),
                   erq->start, erq->head, erq->tail);
 }
        }
        err_printf(m, "  seqno: 0x%08x\n", ee->seqno);
        err_printf(m, "  last_seqno: 0x%08x\n", ee->last_seqno);
-       err_printf(m, "  waiting: %s\n", yesno(ee->waiting));
        err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
        err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
        err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
                                                    error->epoch);
                }
 
-               if (IS_ERR(ee->waiters)) {
-                       err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
-                                  m->i915->engine[i]->name);
-               } else if (ee->num_waiters) {
-                       err_printf(m, "%s --- %d waiters\n",
-                                  m->i915->engine[i]->name,
-                                  ee->num_waiters);
-                       for (j = 0; j < ee->num_waiters; j++) {
-                               err_printf(m, " seqno 0x%08x for %s [%d]\n",
-                                          ee->waiters[j].seqno,
-                                          ee->waiters[j].comm,
-                                          ee->waiters[j].pid);
-                       }
-               }
-
                print_error_obj(m, m->i915->engine[i],
                                "ringbuffer", ee->ringbuffer);
 
                i915_error_object_free(ee->wa_ctx);
 
                kfree(ee->requests);
-               if (!IS_ERR_OR_NULL(ee->waiters))
-                       kfree(ee->waiters);
        }
 
        for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
                        I915_READ(RING_SYNC_2(engine->mmio_base));
 }
 
-static void error_record_engine_waiters(struct intel_engine_cs *engine,
-                                       struct drm_i915_error_engine *ee)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       struct drm_i915_error_waiter *waiter;
-       struct rb_node *rb;
-       int count;
-
-       ee->num_waiters = 0;
-       ee->waiters = NULL;
-
-       if (RB_EMPTY_ROOT(&b->waiters))
-               return;
-
-       if (!spin_trylock_irq(&b->rb_lock)) {
-               ee->waiters = ERR_PTR(-EDEADLK);
-               return;
-       }
-
-       count = 0;
-       for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
-               count++;
-       spin_unlock_irq(&b->rb_lock);
-
-       waiter = NULL;
-       if (count)
-               waiter = kmalloc_array(count,
-                                      sizeof(struct drm_i915_error_waiter),
-                                      GFP_ATOMIC);
-       if (!waiter)
-               return;
-
-       if (!spin_trylock_irq(&b->rb_lock)) {
-               kfree(waiter);
-               ee->waiters = ERR_PTR(-EDEADLK);
-               return;
-       }
-
-       ee->waiters = waiter;
-       for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-               struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-               strcpy(waiter->comm, w->tsk->comm);
-               waiter->pid = w->tsk->pid;
-               waiter->seqno = w->seqno;
-               waiter++;
-
-               if (++ee->num_waiters == count)
-                       break;
-       }
-       spin_unlock_irq(&b->rb_lock);
-}
-
 static void error_record_engine_registers(struct i915_gpu_state *error,
                                          struct intel_engine_cs *engine,
                                          struct drm_i915_error_engine *ee)
 
        intel_engine_get_instdone(engine, &ee->instdone);
 
-       ee->waiting = intel_engine_has_waiter(engine);
        ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
        ee->acthd = intel_engine_get_active_head(engine);
        ee->seqno = intel_engine_get_seqno(engine);
 {
        struct i915_gem_context *ctx = request->gem_context;
 
+       erq->flags = request->fence.flags;
        erq->context = ctx->hw_id;
        erq->sched_attr = request->sched.attr;
        erq->ban_score = atomic_read(&ctx->ban_score);
                ee->engine_id = i;
 
                error_record_engine_registers(error, engine, ee);
-               error_record_engine_waiters(engine, ee);
                error_record_engine_execlists(engine, ee);
 
                request = i915_gem_find_active_request(engine);
 
                int engine_id;
                /* Software tracked state */
                bool idle;
-               bool waiting;
-               int num_waiters;
                unsigned long hangcheck_timestamp;
                struct i915_address_space *vm;
                int num_requests;
                struct drm_i915_error_object *default_state;
 
                struct drm_i915_error_request {
+                       unsigned long flags;
                        long jiffies;
                        pid_t pid;
                        u32 context;
                } *requests, execlist[EXECLIST_MAX_PORTS];
                unsigned int num_ports;
 
-               struct drm_i915_error_waiter {
-                       char comm[TASK_COMM_LEN];
-                       pid_t pid;
-                       u32 seqno;
-               } *waiters;
-
                struct {
                        u32 gfx_mode;
                        union {
 
        return;
 }
 
-static void notify_ring(struct intel_engine_cs *engine)
-{
-       const u32 seqno = intel_engine_get_seqno(engine);
-       struct i915_request *rq = NULL;
-       struct task_struct *tsk = NULL;
-       struct intel_wait *wait;
-
-       if (unlikely(!engine->breadcrumbs.irq_armed))
-               return;
-
-       rcu_read_lock();
-
-       spin_lock(&engine->breadcrumbs.irq_lock);
-       wait = engine->breadcrumbs.irq_wait;
-       if (wait) {
-               /*
-                * We use a callback from the dma-fence to submit
-                * requests after waiting on our own requests. To
-                * ensure minimum delay in queuing the next request to
-                * hardware, signal the fence now rather than wait for
-                * the signaler to be woken up. We still wake up the
-                * waiter in order to handle the irq-seqno coherency
-                * issues (we may receive the interrupt before the
-                * seqno is written, see __i915_request_irq_complete())
-                * and to handle coalescing of multiple seqno updates
-                * and many waiters.
-                */
-               if (i915_seqno_passed(seqno, wait->seqno)) {
-                       struct i915_request *waiter = wait->request;
-
-                       if (waiter &&
-                           !i915_request_signaled(waiter) &&
-                           intel_wait_check_request(wait, waiter))
-                               rq = i915_request_get(waiter);
-
-                       tsk = wait->tsk;
-               }
-
-               engine->breadcrumbs.irq_count++;
-       } else {
-               if (engine->breadcrumbs.irq_armed)
-                       __intel_engine_disarm_breadcrumbs(engine);
-       }
-       spin_unlock(&engine->breadcrumbs.irq_lock);
-
-       if (rq) {
-               spin_lock(&rq->lock);
-               dma_fence_signal_locked(&rq->fence);
-               GEM_BUG_ON(!i915_request_completed(rq));
-               spin_unlock(&rq->lock);
-
-               i915_request_put(rq);
-       }
-
-       if (tsk && tsk->state & TASK_NORMAL)
-               wake_up_process(tsk);
-
-       rcu_read_unlock();
-}
-
 static void vlv_c0_read(struct drm_i915_private *dev_priv,
                        struct intel_rps_ei *ei)
 {
                               u32 gt_iir)
 {
        if (gt_iir & GT_RENDER_USER_INTERRUPT)
-               notify_ring(dev_priv->engine[RCS]);
+               intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
        if (gt_iir & ILK_BSD_USER_INTERRUPT)
-               notify_ring(dev_priv->engine[VCS]);
+               intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 }
 
 static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
                               u32 gt_iir)
 {
        if (gt_iir & GT_RENDER_USER_INTERRUPT)
-               notify_ring(dev_priv->engine[RCS]);
+               intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
        if (gt_iir & GT_BSD_USER_INTERRUPT)
-               notify_ring(dev_priv->engine[VCS]);
+               intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
        if (gt_iir & GT_BLT_USER_INTERRUPT)
-               notify_ring(dev_priv->engine[BCS]);
+               intel_engine_breadcrumbs_irq(dev_priv->engine[BCS]);
 
        if (gt_iir & (GT_BLT_CS_ERROR_INTERRUPT |
                      GT_BSD_CS_ERROR_INTERRUPT |
                tasklet = true;
 
        if (iir & GT_RENDER_USER_INTERRUPT) {
-               notify_ring(engine);
+               intel_engine_breadcrumbs_irq(engine);
                tasklet |= USES_GUC_SUBMISSION(engine->i915);
        }
 
 
        if (HAS_VEBOX(dev_priv)) {
                if (pm_iir & PM_VEBOX_USER_INTERRUPT)
-                       notify_ring(dev_priv->engine[VECS]);
+                       intel_engine_breadcrumbs_irq(dev_priv->engine[VECS]);
 
                if (pm_iir & PM_VEBOX_CS_ERROR_INTERRUPT)
                        DRM_DEBUG("Command parser error, pm_iir 0x%08x\n", pm_iir);
                I915_WRITE16(IIR, iir);
 
                if (iir & I915_USER_INTERRUPT)
-                       notify_ring(dev_priv->engine[RCS]);
+                       intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
                if (iir & I915_MASTER_ERROR_INTERRUPT)
                        i8xx_error_irq_handler(dev_priv, eir, eir_stuck);
                I915_WRITE(IIR, iir);
 
                if (iir & I915_USER_INTERRUPT)
-                       notify_ring(dev_priv->engine[RCS]);
+                       intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
                if (iir & I915_MASTER_ERROR_INTERRUPT)
                        i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
                I915_WRITE(IIR, iir);
 
                if (iir & I915_USER_INTERRUPT)
-                       notify_ring(dev_priv->engine[RCS]);
+                       intel_engine_breadcrumbs_irq(dev_priv->engine[RCS]);
 
                if (iir & I915_BSD_USER_INTERRUPT)
-                       notify_ring(dev_priv->engine[VCS]);
+                       intel_engine_breadcrumbs_irq(dev_priv->engine[VCS]);
 
                if (iir & I915_MASTER_ERROR_INTERRUPT)
                        i9xx_error_irq_handler(dev_priv, eir, eir_stuck);
 
 
 static bool i915_fence_enable_signaling(struct dma_fence *fence)
 {
-       return intel_engine_enable_signaling(to_request(fence), true);
+       return i915_request_enable_breadcrumb(to_request(fence));
 }
 
 static signed long i915_fence_wait(struct dma_fence *fence,
        if (!i915_request_signaled(rq))
                dma_fence_signal_locked(&rq->fence);
        if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
-               intel_engine_cancel_signaling(rq);
+               i915_request_cancel_breadcrumb(rq);
        if (rq->waitboost) {
                GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
                atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
 
        /* We may be recursing from the signal callback of another i915 fence */
        spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
+       GEM_BUG_ON(test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
+       set_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
        request->global_seqno = seqno;
-       if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-               intel_engine_enable_signaling(request, false);
+       if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) &&
+           !i915_request_enable_breadcrumb(request))
+               intel_engine_queue_breadcrumbs(engine);
        spin_unlock(&request->lock);
 
        engine->emit_fini_breadcrumb(request,
        move_to_timeline(request, &engine->timeline);
 
        trace_i915_request_execute(request);
-
-       wake_up_all(&request->execute);
 }
 
 void i915_request_submit(struct i915_request *request)
        spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
        request->global_seqno = 0;
        if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-               intel_engine_cancel_signaling(request);
+               i915_request_cancel_breadcrumb(request);
+       GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags));
+       clear_bit(I915_FENCE_FLAG_ACTIVE, &request->fence.flags);
        spin_unlock(&request->lock);
 
        /* Transfer back from the global per-engine timeline to per-context */
 
        /* We bump the ref for the fence chain */
        i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
-       init_waitqueue_head(&rq->execute);
 
        i915_sched_node_init(&rq->sched);
 
        /* No zalloc, must clear what we need by hand */
        rq->global_seqno = 0;
-       rq->signaling.wait.seqno = 0;
        rq->file_priv = NULL;
        rq->batch = NULL;
        rq->capture_list = NULL;
        return this_cpu != cpu;
 }
 
-static bool __i915_spin_request(const struct i915_request *rq,
-                               u32 seqno, int state, unsigned long timeout_us)
+static bool __i915_spin_request(const struct i915_request * const rq,
+                               int state, unsigned long timeout_us)
 {
-       struct intel_engine_cs *engine = rq->engine;
-       unsigned int irq, cpu;
-
-       GEM_BUG_ON(!seqno);
+       unsigned int cpu;
 
        /*
         * Only wait for the request if we know it is likely to complete.
         * We don't track the timestamps around requests, nor the average
         * request length, so we do not have a good indicator that this
         * request will complete within the timeout. What we do know is the
-        * order in which requests are executed by the engine and so we can
-        * tell if the request has started. If the request hasn't started yet,
-        * it is a fair assumption that it will not complete within our
-        * relatively short timeout.
+        * order in which requests are executed by the context and so we can
+        * tell if the request has been started. If the request is not even
+        * running yet, it is a fair assumption that it will not complete
+        * within our relatively short timeout.
         */
-       if (!intel_engine_has_started(engine, seqno))
+       if (!i915_request_is_running(rq))
                return false;
 
        /*
         * takes to sleep on a request, on the order of a microsecond.
         */
 
-       irq = READ_ONCE(engine->breadcrumbs.irq_count);
        timeout_us += local_clock_us(&cpu);
        do {
-               if (intel_engine_has_completed(engine, seqno))
-                       return seqno == i915_request_global_seqno(rq);
-
-               /*
-                * Seqno are meant to be ordered *before* the interrupt. If
-                * we see an interrupt without a corresponding seqno advance,
-                * assume we won't see one in the near future but require
-                * the engine->seqno_barrier() to fixup coherency.
-                */
-               if (READ_ONCE(engine->breadcrumbs.irq_count) != irq)
-                       break;
+               if (i915_request_completed(rq))
+                       return true;
 
                if (signal_pending_state(state, current))
                        break;
        return false;
 }
 
+struct request_wait {
+       struct dma_fence_cb cb;
+       struct task_struct *tsk;
+};
+
+static void request_wait_wake(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+       struct request_wait *wait = container_of(cb, typeof(*wait), cb);
+
+       wake_up_process(wait->tsk);
+}
+
 /**
  * i915_request_wait - wait until execution of request has finished
  * @rq: the request to wait upon
 {
        const int state = flags & I915_WAIT_INTERRUPTIBLE ?
                TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-       DEFINE_WAIT_FUNC(exec, default_wake_function);
-       struct intel_wait wait;
+       struct request_wait wait;
 
        might_sleep();
        GEM_BUG_ON(timeout < 0);
                return -ETIME;
 
        trace_i915_request_wait_begin(rq, flags);
-       add_wait_queue(&rq->execute, &exec);
-       intel_wait_init(&wait);
-       if (flags & I915_WAIT_PRIORITY)
-               i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
-
-restart:
-       do {
-               set_current_state(state);
-               if (intel_wait_update_request(&wait, rq))
-                       break;
-
-               if (signal_pending_state(state, current)) {
-                       timeout = -ERESTARTSYS;
-                       goto complete;
-               }
 
-               if (!timeout) {
-                       timeout = -ETIME;
-                       goto complete;
-               }
+       /* Optimistic short spin before touching IRQs */
+       if (__i915_spin_request(rq, state, 5))
+               goto out;
 
-               timeout = io_schedule_timeout(timeout);
-       } while (1);
+       if (flags & I915_WAIT_PRIORITY)
+               i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
 
-       GEM_BUG_ON(!intel_wait_has_seqno(&wait));
-       GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+       wait.tsk = current;
+       if (dma_fence_add_callback(&rq->fence, &wait.cb, request_wait_wake))
+               goto out;
 
-       /* Optimistic short spin before touching IRQs */
-       if (__i915_spin_request(rq, wait.seqno, state, 5))
-               goto complete;
+       for (;;) {
+               set_current_state(state);
 
-       set_current_state(state);
-       if (intel_engine_add_wait(rq->engine, &wait))
-               /*
-                * In order to check that we haven't missed the interrupt
-                * as we enabled it, we need to kick ourselves to do a
-                * coherent check on the seqno before we sleep.
-                */
-               goto wakeup;
+               if (i915_request_completed(rq))
+                       break;
 
-       for (;;) {
                if (signal_pending_state(state, current)) {
                        timeout = -ERESTARTSYS;
                        break;
                }
 
                timeout = io_schedule_timeout(timeout);
-
-               if (intel_wait_complete(&wait) &&
-                   intel_wait_check_request(&wait, rq))
-                       break;
-
-               set_current_state(state);
-
-wakeup:
-               if (i915_request_completed(rq))
-                       break;
-
-               /* Only spin if we know the GPU is processing this request */
-               if (__i915_spin_request(rq, wait.seqno, state, 2))
-                       break;
-
-               if (!intel_wait_check_request(&wait, rq)) {
-                       intel_engine_remove_wait(rq->engine, &wait);
-                       goto restart;
-               }
        }
-
-       intel_engine_remove_wait(rq->engine, &wait);
-complete:
        __set_current_state(TASK_RUNNING);
-       remove_wait_queue(&rq->execute, &exec);
-       trace_i915_request_wait_end(rq);
 
+       dma_fence_remove_callback(&rq->fence, &wait.cb);
+
+out:
+       trace_i915_request_wait_end(rq);
        return timeout;
 }
 
 
 struct i915_request;
 struct i915_timeline;
 
-struct intel_wait {
-       struct rb_node node;
-       struct task_struct *tsk;
-       struct i915_request *request;
-       u32 seqno;
-};
-
-struct intel_signal_node {
-       struct intel_wait wait;
-       struct list_head link;
-};
-
 struct i915_capture_list {
        struct i915_capture_list *next;
        struct i915_vma *vma;
 };
 
+enum {
+       /*
+        * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW.
+        *
+        * Set by __i915_request_submit() on handing over to HW, and cleared
+        * by __i915_request_unsubmit() if we preempt this request.
+        *
+        * Finally cleared for consistency on retiring the request, when
+        * we know the HW is no longer running this request.
+        *
+        * See i915_request_is_active()
+        */
+       I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,
+
+       /*
+        * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
+        *
+        * Internal bookkeeping used by the breadcrumb code to track when
+        * a request is on the various signal_list.
+        */
+       I915_FENCE_FLAG_SIGNAL,
+};
+
 /**
  * Request queue structure.
  *
        struct intel_context *hw_context;
        struct intel_ring *ring;
        struct i915_timeline *timeline;
-       struct intel_signal_node signaling;
+       struct list_head signal_link;
 
        /*
         * The rcu epoch of when this request was allocated. Used to judiciously
         */
        struct i915_sw_fence submit;
        wait_queue_entry_t submitq;
-       wait_queue_head_t execute;
 
        /*
         * A list of everyone we wait upon, and everyone who waits upon us.
  * that it has passed the global seqno and the global seqno is unchanged
  * after the read, it is indeed complete).
  */
-static u32
+static inline u32
 i915_request_global_seqno(const struct i915_request *request)
 {
        return READ_ONCE(request->global_seqno);
 void __i915_request_unsubmit(struct i915_request *request);
 void i915_request_unsubmit(struct i915_request *request);
 
+/* Note: part of the intel_breadcrumbs family */
+bool i915_request_enable_breadcrumb(struct i915_request *request);
+void i915_request_cancel_breadcrumb(struct i915_request *request);
+
 long i915_request_wait(struct i915_request *rq,
                       unsigned int flags,
                       long timeout)
        return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
 }
 
+static inline bool i915_request_is_active(const struct i915_request *rq)
+{
+       return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
+}
+
 /**
  * Returns true if seq1 is later than seq2.
  */
        return seqno;
 }
 
+static inline bool __i915_request_has_started(const struct i915_request *rq)
+{
+       return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
+}
+
 /**
  * i915_request_started - check if the request has begun being executed
  * @rq: the request
                return true;
 
        /* Remember: started but may have since been preempted! */
-       return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
+       return __i915_request_has_started(rq);
+}
+
+/**
+ * i915_request_is_running - check if the request may actually be executing
+ * @rq: the request
+ *
+ * Returns true if the request is currently submitted to hardware, has passed
+ * its start point (i.e. the context is setup and not busywaiting). Note that
+ * it may no longer be running by the time the function returns!
+ */
+static inline bool i915_request_is_running(const struct i915_request *rq)
+{
+       if (!i915_request_is_active(rq))
+               return false;
+
+       return __i915_request_has_started(rq);
 }
 
 static inline bool i915_request_completed(const struct i915_request *rq)
 
 
        spin_lock(&timeline->lock);
 
-       if (rq->global_seqno) {
+       if (i915_request_is_active(rq)) {
                list_for_each_entry_continue(rq,
                                             &engine->timeline.requests, link)
                        if (rq->gem_context == hung_ctx)
 
 static void nop_submit_request(struct i915_request *request)
 {
+       struct intel_engine_cs *engine = request->engine;
        unsigned long flags;
 
        GEM_TRACE("%s fence %llx:%lld -> -EIO\n",
-                 request->engine->name,
-                 request->fence.context, request->fence.seqno);
+                 engine->name, request->fence.context, request->fence.seqno);
        dma_fence_set_error(&request->fence, -EIO);
 
-       spin_lock_irqsave(&request->engine->timeline.lock, flags);
+       spin_lock_irqsave(&engine->timeline.lock, flags);
        __i915_request_submit(request);
        i915_request_mark_complete(request);
-       intel_engine_write_global_seqno(request->engine, request->global_seqno);
-       spin_unlock_irqrestore(&request->engine->timeline.lock, flags);
+       intel_engine_write_global_seqno(engine, request->global_seqno);
+       spin_unlock_irqrestore(&engine->timeline.lock, flags);
+
+       intel_engine_queue_breadcrumbs(engine);
 }
 
 void i915_gem_set_wedged(struct drm_i915_private *i915)
 
        for_each_engine(engine, i915, id) {
                reset_finish_engine(engine);
-               intel_engine_wakeup(engine);
+               intel_engine_signal_breadcrumbs(engine);
        }
 
        smp_mb__before_atomic();
 
 {
        const struct i915_request *active;
 
-       if (!rq->global_seqno)
+       if (!i915_request_is_active(rq))
                return false;
 
        active = port_request(engine->execlists.port);
 
 
 #define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_rq)
 
-static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
+static void irq_enable(struct intel_engine_cs *engine)
+{
+       if (!engine->irq_enable)
+               return;
+
+       /* Caller disables interrupts */
+       spin_lock(&engine->i915->irq_lock);
+       engine->irq_enable(engine);
+       spin_unlock(&engine->i915->irq_lock);
+}
+
+static void irq_disable(struct intel_engine_cs *engine)
 {
-       struct intel_wait *wait;
-       unsigned int result = 0;
+       if (!engine->irq_disable)
+               return;
+
+       /* Caller disables interrupts */
+       spin_lock(&engine->i915->irq_lock);
+       engine->irq_disable(engine);
+       spin_unlock(&engine->i915->irq_lock);
+}
 
+static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
+{
        lockdep_assert_held(&b->irq_lock);
 
-       wait = b->irq_wait;
-       if (wait) {
+       GEM_BUG_ON(!b->irq_enabled);
+       if (!--b->irq_enabled)
+               irq_disable(container_of(b,
+                                        struct intel_engine_cs,
+                                        breadcrumbs));
+
+       b->irq_armed = false;
+}
+
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
+{
+       struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+       if (!b->irq_armed)
+               return;
+
+       spin_lock_irq(&b->irq_lock);
+       if (b->irq_armed)
+               __intel_breadcrumbs_disarm_irq(b);
+       spin_unlock_irq(&b->irq_lock);
+}
+
+static inline bool __request_completed(const struct i915_request *rq)
+{
+       return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno);
+}
+
+bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine)
+{
+       struct intel_breadcrumbs *b = &engine->breadcrumbs;
+       struct intel_context *ce, *cn;
+       struct list_head *pos, *next;
+       LIST_HEAD(signal);
+
+       spin_lock(&b->irq_lock);
+
+       b->irq_fired = true;
+       if (b->irq_armed && list_empty(&b->signalers))
+               __intel_breadcrumbs_disarm_irq(b);
+
+       list_for_each_entry_safe(ce, cn, &b->signalers, signal_link) {
+               GEM_BUG_ON(list_empty(&ce->signals));
+
+               list_for_each_safe(pos, next, &ce->signals) {
+                       struct i915_request *rq =
+                               list_entry(pos, typeof(*rq), signal_link);
+
+                       if (!__request_completed(rq))
+                               break;
+
+                       GEM_BUG_ON(!test_bit(I915_FENCE_FLAG_SIGNAL,
+                                            &rq->fence.flags));
+                       clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+
+                       /*
+                        * We may race with direct invocation of
+                        * dma_fence_signal(), e.g. i915_request_retire(),
+                        * in which case we can skip processing it ourselves.
+                        */
+                       if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+                                    &rq->fence.flags))
+                               continue;
+
+                       /*
+                        * Queue for execution after dropping the signaling
+                        * spinlock as the callback chain may end up adding
+                        * more signalers to the same context or engine.
+                        */
+                       i915_request_get(rq);
+                       list_add_tail(&rq->signal_link, &signal);
+               }
+
                /*
-                * N.B. Since task_asleep() and ttwu are not atomic, the
-                * waiter may actually go to sleep after the check, causing
-                * us to suppress a valid wakeup. We prefer to reduce the
-                * number of false positive missed_breadcrumb() warnings
-                * at the expense of a few false negatives, as it it easy
-                * to trigger a false positive under heavy load. Enough
-                * signal should remain from genuine missed_breadcrumb()
-                * for us to detect in CI.
+                * We process the list deletion in bulk, only using a list_add
+                * (not list_move) above but keeping the status of
+                * rq->signal_link known with the I915_FENCE_FLAG_SIGNAL bit.
                 */
-               bool was_asleep = task_asleep(wait->tsk);
+               if (!list_is_first(pos, &ce->signals)) {
+                       /* Advance the list to the first incomplete request */
+                       __list_del_many(&ce->signals, pos);
+                       if (&ce->signals == pos) /* now empty */
+                               list_del_init(&ce->signal_link);
+               }
+       }
+
+       spin_unlock(&b->irq_lock);
+
+       list_for_each_safe(pos, next, &signal) {
+               struct i915_request *rq =
+                       list_entry(pos, typeof(*rq), signal_link);
 
-               result = ENGINE_WAKEUP_WAITER;
-               if (wake_up_process(wait->tsk) && was_asleep)
-                       result |= ENGINE_WAKEUP_ASLEEP;
+               dma_fence_signal(&rq->fence);
+               i915_request_put(rq);
        }
 
-       return result;
+       return !list_empty(&signal);
 }
 
-unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
+bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine)
 {
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       unsigned long flags;
-       unsigned int result;
+       bool result;
 
-       spin_lock_irqsave(&b->irq_lock, flags);
-       result = __intel_breadcrumbs_wakeup(b);
-       spin_unlock_irqrestore(&b->irq_lock, flags);
+       local_irq_disable();
+       result = intel_engine_breadcrumbs_irq(engine);
+       local_irq_enable();
 
        return result;
 }
 
+static void signal_irq_work(struct irq_work *work)
+{
+       struct intel_engine_cs *engine =
+               container_of(work, typeof(*engine), breadcrumbs.irq_work);
+
+       intel_engine_breadcrumbs_irq(engine);
+}
+
 static unsigned long wait_timeout(void)
 {
        return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
        struct intel_engine_cs *engine =
                from_timer(engine, t, breadcrumbs.hangcheck);
        struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       unsigned int irq_count;
 
        if (!b->irq_armed)
                return;
 
-       irq_count = READ_ONCE(b->irq_count);
-       if (b->hangcheck_interrupts != irq_count) {
-               b->hangcheck_interrupts = irq_count;
-               mod_timer(&b->hangcheck, wait_timeout());
-               return;
-       }
+       if (b->irq_fired)
+               goto rearm;
 
-       /* We keep the hangcheck timer alive until we disarm the irq, even
+       /*
+        * We keep the hangcheck timer alive until we disarm the irq, even
         * if there are no waiters at present.
         *
         * If the waiter was currently running, assume it hasn't had a chance
         * but we still have a waiter. Assuming all batches complete within
         * DRM_I915_HANGCHECK_JIFFIES [1.5s]!
         */
-       if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) {
+       synchronize_hardirq(engine->i915->drm.irq);
+       if (intel_engine_signal_breadcrumbs(engine)) {
                missed_breadcrumb(engine);
                mod_timer(&b->fake_irq, jiffies + 1);
        } else {
+rearm:
+               b->irq_fired = false;
                mod_timer(&b->hangcheck, wait_timeout());
        }
 }
         * oldest waiter to do the coherent seqno check.
         */
 
-       spin_lock_irq(&b->irq_lock);
-       if (b->irq_armed && !__intel_breadcrumbs_wakeup(b))
-               __intel_engine_disarm_breadcrumbs(engine);
-       spin_unlock_irq(&b->irq_lock);
-       if (!b->irq_armed)
+       if (!intel_engine_signal_breadcrumbs(engine) && !b->irq_armed)
                return;
 
        /* If the user has disabled the fake-irq, restore the hangchecking */
        mod_timer(&b->fake_irq, jiffies + 1);
 }
 
-static void irq_enable(struct intel_engine_cs *engine)
-{
-       if (!engine->irq_enable)
-               return;
-
-       /* Caller disables interrupts */
-       spin_lock(&engine->i915->irq_lock);
-       engine->irq_enable(engine);
-       spin_unlock(&engine->i915->irq_lock);
-}
-
-static void irq_disable(struct intel_engine_cs *engine)
-{
-       if (!engine->irq_disable)
-               return;
-
-       /* Caller disables interrupts */
-       spin_lock(&engine->i915->irq_lock);
-       engine->irq_disable(engine);
-       spin_unlock(&engine->i915->irq_lock);
-}
-
-void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-       lockdep_assert_held(&b->irq_lock);
-       GEM_BUG_ON(b->irq_wait);
-       GEM_BUG_ON(!b->irq_armed);
-
-       GEM_BUG_ON(!b->irq_enabled);
-       if (!--b->irq_enabled)
-               irq_disable(engine);
-
-       b->irq_armed = false;
-}
-
 void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine)
 {
        struct intel_breadcrumbs *b = &engine->breadcrumbs;
        spin_unlock_irq(&b->irq_lock);
 }
 
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       struct intel_wait *wait, *n;
-
-       if (!b->irq_armed)
-               return;
-
-       /*
-        * We only disarm the irq when we are idle (all requests completed),
-        * so if the bottom-half remains asleep, it missed the request
-        * completion.
-        */
-       if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP)
-               missed_breadcrumb(engine);
-
-       spin_lock_irq(&b->rb_lock);
-
-       spin_lock(&b->irq_lock);
-       b->irq_wait = NULL;
-       if (b->irq_armed)
-               __intel_engine_disarm_breadcrumbs(engine);
-       spin_unlock(&b->irq_lock);
-
-       rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) {
-               GEM_BUG_ON(!intel_engine_signaled(engine, wait->seqno));
-               RB_CLEAR_NODE(&wait->node);
-               wake_up_process(wait->tsk);
-       }
-       b->waiters = RB_ROOT;
-
-       spin_unlock_irq(&b->rb_lock);
-}
-
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
 {
        const struct intel_engine_cs *engine =
         * engine->seqno_barrier(), a timing error that should be transient
         * and unlikely to reoccur.
         */
-       return READ_ONCE(b->irq_count) == b->hangcheck_interrupts;
+       return !b->irq_fired;
 }
 
 static void enable_fake_irq(struct intel_breadcrumbs *b)
                mod_timer(&b->hangcheck, wait_timeout());
 }
 
-static bool __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
+static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 {
        struct intel_engine_cs *engine =
                container_of(b, struct intel_engine_cs, breadcrumbs);
        return enabled;
 }
 
-static inline struct intel_wait *to_wait(struct rb_node *node)
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 {
-       return rb_entry(node, struct intel_wait, node);
-}
+       struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b,
-                                             struct intel_wait *wait)
-{
-       lockdep_assert_held(&b->rb_lock);
-       GEM_BUG_ON(b->irq_wait == wait);
+       spin_lock_init(&b->irq_lock);
+       INIT_LIST_HEAD(&b->signalers);
 
-       /*
-        * This request is completed, so remove it from the tree, mark it as
-        * complete, and *then* wake up the associated task. N.B. when the
-        * task wakes up, it will find the empty rb_node, discern that it
-        * has already been removed from the tree and skip the serialisation
-        * of the b->rb_lock and b->irq_lock. This means that the destruction
-        * of the intel_wait is not serialised with the interrupt handler
-        * by the waiter - it must instead be serialised by the caller.
-        */
-       rb_erase(&wait->node, &b->waiters);
-       RB_CLEAR_NODE(&wait->node);
+       init_irq_work(&b->irq_work, signal_irq_work);
 
-       if (wait->tsk->state != TASK_RUNNING)
-               wake_up_process(wait->tsk); /* implicit smp_wmb() */
+       timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
+       timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
 }
 
-static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine,
-                                           struct rb_node *next)
+static void cancel_fake_irq(struct intel_engine_cs *engine)
 {
        struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
-       spin_lock(&b->irq_lock);
-       GEM_BUG_ON(!b->irq_armed);
-       GEM_BUG_ON(!b->irq_wait);
-       b->irq_wait = to_wait(next);
-       spin_unlock(&b->irq_lock);
-
-       /* We always wake up the next waiter that takes over as the bottom-half
-        * as we may delegate not only the irq-seqno barrier to the next waiter
-        * but also the task of waking up concurrent waiters.
-        */
-       if (next)
-               wake_up_process(to_wait(next)->tsk);
+       del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
+       del_timer_sync(&b->hangcheck);
+       clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 }
 
-static bool __intel_engine_add_wait(struct intel_engine_cs *engine,
-                                   struct intel_wait *wait)
+void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 {
        struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       struct rb_node **p, *parent, *completed;
-       bool first, armed;
-       u32 seqno;
+       unsigned long flags;
 
-       GEM_BUG_ON(!wait->seqno);
+       spin_lock_irqsave(&b->irq_lock, flags);
 
-       /* Insert the request into the retirement ordered list
-        * of waiters by walking the rbtree. If we are the oldest
-        * seqno in the tree (the first to be retired), then
-        * set ourselves as the bottom-half.
-        *
-        * As we descend the tree, prune completed branches since we hold the
-        * spinlock we know that the first_waiter must be delayed and can
-        * reduce some of the sequential wake up latency if we take action
-        * ourselves and wake up the completed tasks in parallel. Also, by
-        * removing stale elements in the tree, we may be able to reduce the
-        * ping-pong between the old bottom-half and ourselves as first-waiter.
+       /*
+        * Leave the fake_irq timer enabled (if it is running), but clear the
+        * bit so that it turns itself off on its next wake up and goes back
+        * to the long hangcheck interval if still required.
         */
-       armed = false;
-       first = true;
-       parent = NULL;
-       completed = NULL;
-       seqno = intel_engine_get_seqno(engine);
-
-        /* If the request completed before we managed to grab the spinlock,
-         * return now before adding ourselves to the rbtree. We let the
-         * current bottom-half handle any pending wakeups and instead
-         * try and get out of the way quickly.
-         */
-       if (i915_seqno_passed(seqno, wait->seqno)) {
-               RB_CLEAR_NODE(&wait->node);
-               return first;
-       }
-
-       p = &b->waiters.rb_node;
-       while (*p) {
-               parent = *p;
-               if (wait->seqno == to_wait(parent)->seqno) {
-                       /* We have multiple waiters on the same seqno, select
-                        * the highest priority task (that with the smallest
-                        * task->prio) to serve as the bottom-half for this
-                        * group.
-                        */
-                       if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
-                               p = &parent->rb_right;
-                               first = false;
-                       } else {
-                               p = &parent->rb_left;
-                       }
-               } else if (i915_seqno_passed(wait->seqno,
-                                            to_wait(parent)->seqno)) {
-                       p = &parent->rb_right;
-                       if (i915_seqno_passed(seqno, to_wait(parent)->seqno))
-                               completed = parent;
-                       else
-                               first = false;
-               } else {
-                       p = &parent->rb_left;
-               }
-       }
-       rb_link_node(&wait->node, parent, p);
-       rb_insert_color(&wait->node, &b->waiters);
-
-       if (first) {
-               spin_lock(&b->irq_lock);
-               b->irq_wait = wait;
-               /* After assigning ourselves as the new bottom-half, we must
-                * perform a cursory check to prevent a missed interrupt.
-                * Either we miss the interrupt whilst programming the hardware,
-                * or if there was a previous waiter (for a later seqno) they
-                * may be woken instead of us (due to the inherent race
-                * in the unlocked read of b->irq_seqno_bh in the irq handler)
-                * and so we miss the wake up.
-                */
-               armed = __intel_breadcrumbs_enable_irq(b);
-               spin_unlock(&b->irq_lock);
-       }
-
-       if (completed) {
-               /* Advance the bottom-half (b->irq_wait) before we wake up
-                * the waiters who may scribble over their intel_wait
-                * just as the interrupt handler is dereferencing it via
-                * b->irq_wait.
-                */
-               if (!first) {
-                       struct rb_node *next = rb_next(completed);
-                       GEM_BUG_ON(next == &wait->node);
-                       __intel_breadcrumbs_next(engine, next);
-               }
-
-               do {
-                       struct intel_wait *crumb = to_wait(completed);
-                       completed = rb_prev(completed);
-                       __intel_breadcrumbs_finish(b, crumb);
-               } while (completed);
-       }
-
-       GEM_BUG_ON(!b->irq_wait);
-       GEM_BUG_ON(!b->irq_armed);
-       GEM_BUG_ON(rb_first(&b->waiters) != &b->irq_wait->node);
-
-       return armed;
-}
-
-bool intel_engine_add_wait(struct intel_engine_cs *engine,
-                          struct intel_wait *wait)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       bool armed;
-
-       spin_lock_irq(&b->rb_lock);
-       armed = __intel_engine_add_wait(engine, wait);
-       spin_unlock_irq(&b->rb_lock);
-       if (armed)
-               return armed;
-
-       /* Make the caller recheck if its request has already started. */
-       return intel_engine_has_started(engine, wait->seqno);
-}
-
-static inline bool chain_wakeup(struct rb_node *rb, int priority)
-{
-       return rb && to_wait(rb)->tsk->prio <= priority;
-}
+       clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 
-static inline int wakeup_priority(struct intel_breadcrumbs *b,
-                                 struct task_struct *tsk)
-{
-       if (tsk == b->signaler)
-               return INT_MIN;
+       if (b->irq_enabled)
+               irq_enable(engine);
        else
-               return tsk->prio;
-}
-
-static void __intel_engine_remove_wait(struct intel_engine_cs *engine,
-                                      struct intel_wait *wait)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-       lockdep_assert_held(&b->rb_lock);
-
-       if (RB_EMPTY_NODE(&wait->node))
-               goto out;
-
-       if (b->irq_wait == wait) {
-               const int priority = wakeup_priority(b, wait->tsk);
-               struct rb_node *next;
-
-               /* We are the current bottom-half. Find the next candidate,
-                * the first waiter in the queue on the remaining oldest
-                * request. As multiple seqnos may complete in the time it
-                * takes us to wake up and find the next waiter, we have to
-                * wake up that waiter for it to perform its own coherent
-                * completion check.
-                */
-               next = rb_next(&wait->node);
-               if (chain_wakeup(next, priority)) {
-                       /* If the next waiter is already complete,
-                        * wake it up and continue onto the next waiter. So
-                        * if have a small herd, they will wake up in parallel
-                        * rather than sequentially, which should reduce
-                        * the overall latency in waking all the completed
-                        * clients.
-                        *
-                        * However, waking up a chain adds extra latency to
-                        * the first_waiter. This is undesirable if that
-                        * waiter is a high priority task.
-                        */
-                       u32 seqno = intel_engine_get_seqno(engine);
-
-                       while (i915_seqno_passed(seqno, to_wait(next)->seqno)) {
-                               struct rb_node *n = rb_next(next);
-
-                               __intel_breadcrumbs_finish(b, to_wait(next));
-                               next = n;
-                               if (!chain_wakeup(next, priority))
-                                       break;
-                       }
-               }
-
-               __intel_breadcrumbs_next(engine, next);
-       } else {
-               GEM_BUG_ON(rb_first(&b->waiters) == &wait->node);
-       }
-
-       GEM_BUG_ON(RB_EMPTY_NODE(&wait->node));
-       rb_erase(&wait->node, &b->waiters);
-       RB_CLEAR_NODE(&wait->node);
+               irq_disable(engine);
 
-out:
-       GEM_BUG_ON(b->irq_wait == wait);
-       GEM_BUG_ON(rb_first(&b->waiters) !=
-                  (b->irq_wait ? &b->irq_wait->node : NULL));
+       spin_unlock_irqrestore(&b->irq_lock, flags);
 }
 
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-                             struct intel_wait *wait)
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 {
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-       /* Quick check to see if this waiter was already decoupled from
-        * the tree by the bottom-half to avoid contention on the spinlock
-        * by the herd.
-        */
-       if (RB_EMPTY_NODE(&wait->node)) {
-               GEM_BUG_ON(READ_ONCE(b->irq_wait) == wait);
-               return;
-       }
-
-       spin_lock_irq(&b->rb_lock);
-       __intel_engine_remove_wait(engine, wait);
-       spin_unlock_irq(&b->rb_lock);
+       cancel_fake_irq(engine);
 }
 
-static void signaler_set_rtpriority(void)
+bool i915_request_enable_breadcrumb(struct i915_request *rq)
 {
-        struct sched_param param = { .sched_priority = 1 };
-
-        sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m);
-}
+       struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 
-static int intel_breadcrumbs_signaler(void *arg)
-{
-       struct intel_engine_cs *engine = arg;
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       struct i915_request *rq, *n;
+       GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags));
 
-       /* Install ourselves with high priority to reduce signalling latency */
-       signaler_set_rtpriority();
+       if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags))
+               return true;
 
-       do {
-               bool do_schedule = true;
-               LIST_HEAD(list);
-               u32 seqno;
+       spin_lock(&b->irq_lock);
+       if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags) &&
+           !__request_completed(rq)) {
+               struct intel_context *ce = rq->hw_context;
+               struct list_head *pos;
 
-               set_current_state(TASK_INTERRUPTIBLE);
-               if (list_empty(&b->signals))
-                       goto sleep;
+               __intel_breadcrumbs_arm_irq(b);
 
                /*
-                * We are either woken up by the interrupt bottom-half,
-                * or by a client adding a new signaller. In both cases,
-                * the GPU seqno may have advanced beyond our oldest signal.
-                * If it has, propagate the signal, remove the waiter and
-                * check again with the next oldest signal. Otherwise we
-                * need to wait for a new interrupt from the GPU or for
-                * a new client.
+                * We keep the seqno in retirement order, so we can break
+                * inside intel_engine_breadcrumbs_irq as soon as we've passed
+                * the last completed request (or seen a request that hasn't
+                * event started). We could iterate the timeline->requests list,
+                * but keeping a separate signalers_list has the advantage of
+                * hopefully being much smaller than the full list and so
+                * provides faster iteration and detection when there are no
+                * more interrupts required for this context.
+                *
+                * We typically expect to add new signalers in order, so we
+                * start looking for our insertion point from the tail of
+                * the list.
                 */
-               seqno = intel_engine_get_seqno(engine);
-
-               spin_lock_irq(&b->rb_lock);
-               list_for_each_entry_safe(rq, n, &b->signals, signaling.link) {
-                       u32 this = rq->signaling.wait.seqno;
+               list_for_each_prev(pos, &ce->signals) {
+                       struct i915_request *it =
+                               list_entry(pos, typeof(*it), signal_link);
 
-                       GEM_BUG_ON(!rq->signaling.wait.seqno);
-
-                       if (!i915_seqno_passed(seqno, this))
+                       if (i915_seqno_passed(rq->fence.seqno, it->fence.seqno))
                                break;
-
-                       if (likely(this == i915_request_global_seqno(rq))) {
-                               __intel_engine_remove_wait(engine,
-                                                          &rq->signaling.wait);
-
-                               rq->signaling.wait.seqno = 0;
-                               __list_del_entry(&rq->signaling.link);
-
-                               if (!i915_request_signaled(rq)) {
-                                       list_add_tail(&rq->signaling.link,
-                                                     &list);
-                                       i915_request_get(rq);
-                               }
-                       }
                }
-               spin_unlock_irq(&b->rb_lock);
-
-               if (!list_empty(&list)) {
-                       local_bh_disable();
-                       list_for_each_entry_safe(rq, n, &list, signaling.link) {
-                               dma_fence_signal(&rq->fence);
-                               GEM_BUG_ON(!i915_request_completed(rq));
-                               i915_request_put(rq);
-                       }
-                       local_bh_enable(); /* kick start the tasklets */
-
-                       /*
-                        * If the engine is saturated we may be continually
-                        * processing completed requests. This angers the
-                        * NMI watchdog if we never let anything else
-                        * have access to the CPU. Let's pretend to be nice
-                        * and relinquish the CPU if we burn through the
-                        * entire RT timeslice!
-                        */
-                       do_schedule = need_resched();
-               }
-
-               if (unlikely(do_schedule)) {
-sleep:
-                       if (kthread_should_park())
-                               kthread_parkme();
-
-                       if (unlikely(kthread_should_stop()))
-                               break;
-
-                       schedule();
-               }
-       } while (1);
-       __set_current_state(TASK_RUNNING);
-
-       return 0;
-}
+               list_add(&rq->signal_link, pos);
+               if (pos == &ce->signals) /* catch transitions from empty list */
+                       list_move_tail(&ce->signal_link, &b->signalers);
 
-static void insert_signal(struct intel_breadcrumbs *b,
-                         struct i915_request *request,
-                         const u32 seqno)
-{
-       struct i915_request *iter;
-
-       lockdep_assert_held(&b->rb_lock);
-
-       /*
-        * A reasonable assumption is that we are called to add signals
-        * in sequence, as the requests are submitted for execution and
-        * assigned a global_seqno. This will be the case for the majority
-        * of internally generated signals (inter-engine signaling).
-        *
-        * Out of order waiters triggering random signaling enabling will
-        * be more problematic, but hopefully rare enough and the list
-        * small enough that the O(N) insertion sort is not an issue.
-        */
-
-       list_for_each_entry_reverse(iter, &b->signals, signaling.link)
-               if (i915_seqno_passed(seqno, iter->signaling.wait.seqno))
-                       break;
-
-       list_add(&request->signaling.link, &iter->signaling.link);
-}
-
-bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup)
-{
-       struct intel_engine_cs *engine = request->engine;
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       struct intel_wait *wait = &request->signaling.wait;
-       u32 seqno;
-
-       /*
-        * Note that we may be called from an interrupt handler on another
-        * device (e.g. nouveau signaling a fence completion causing us
-        * to submit a request, and so enable signaling). As such,
-        * we need to make sure that all other users of b->rb_lock protect
-        * against interrupts, i.e. use spin_lock_irqsave.
-        */
-
-       /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */
-       GEM_BUG_ON(!irqs_disabled());
-       lockdep_assert_held(&request->lock);
-
-       seqno = i915_request_global_seqno(request);
-       if (!seqno) /* will be enabled later upon execution */
-               return true;
-
-       GEM_BUG_ON(wait->seqno);
-       wait->tsk = b->signaler;
-       wait->request = request;
-       wait->seqno = seqno;
-
-       /*
-        * Add ourselves into the list of waiters, but registering our
-        * bottom-half as the signaller thread. As per usual, only the oldest
-        * waiter (not just signaller) is tasked as the bottom-half waking
-        * up all completed waiters after the user interrupt.
-        *
-        * If we are the oldest waiter, enable the irq (after which we
-        * must double check that the seqno did not complete).
-        */
-       spin_lock(&b->rb_lock);
-       insert_signal(b, request, seqno);
-       wakeup &= __intel_engine_add_wait(engine, wait);
-       spin_unlock(&b->rb_lock);
-
-       if (wakeup) {
-               wake_up_process(b->signaler);
-               return !intel_wait_complete(wait);
+               set_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
        }
+       spin_unlock(&b->irq_lock);
 
-       return true;
+       return !__request_completed(rq);
 }
 
-void intel_engine_cancel_signaling(struct i915_request *request)
+void i915_request_cancel_breadcrumb(struct i915_request *rq)
 {
-       struct intel_engine_cs *engine = request->engine;
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
+       struct intel_breadcrumbs *b = &rq->engine->breadcrumbs;
 
-       GEM_BUG_ON(!irqs_disabled());
-       lockdep_assert_held(&request->lock);
-
-       if (!READ_ONCE(request->signaling.wait.seqno))
+       if (!test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags))
                return;
 
-       spin_lock(&b->rb_lock);
-       __intel_engine_remove_wait(engine, &request->signaling.wait);
-       if (fetch_and_zero(&request->signaling.wait.seqno))
-               __list_del_entry(&request->signaling.link);
-       spin_unlock(&b->rb_lock);
-}
-
-int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       struct task_struct *tsk;
-
-       spin_lock_init(&b->rb_lock);
-       spin_lock_init(&b->irq_lock);
-
-       timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0);
-       timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0);
-
-       INIT_LIST_HEAD(&b->signals);
-
-       /* Spawn a thread to provide a common bottom-half for all signals.
-        * As this is an asynchronous interface we cannot steal the current
-        * task for handling the bottom-half to the user interrupt, therefore
-        * we create a thread to do the coherent seqno dance after the
-        * interrupt and then signal the waitqueue (via the dma-buf/fence).
-        */
-       tsk = kthread_run(intel_breadcrumbs_signaler, engine,
-                         "i915/signal:%d", engine->id);
-       if (IS_ERR(tsk))
-               return PTR_ERR(tsk);
-
-       b->signaler = tsk;
-
-       return 0;
-}
+       spin_lock(&b->irq_lock);
+       if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) {
+               struct intel_context *ce = rq->hw_context;
 
-static void cancel_fake_irq(struct intel_engine_cs *engine)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
+               list_del(&rq->signal_link);
+               if (list_empty(&ce->signals))
+                       list_del_init(&ce->signal_link);
 
-       del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */
-       del_timer_sync(&b->hangcheck);
-       clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+               clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags);
+       }
+       spin_unlock(&b->irq_lock);
 }
 
-void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+                                   struct drm_printer *p)
 {
        struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       unsigned long flags;
+       struct intel_context *ce;
+       struct i915_request *rq;
 
-       spin_lock_irqsave(&b->irq_lock, flags);
-
-       /*
-        * Leave the fake_irq timer enabled (if it is running), but clear the
-        * bit so that it turns itself off on its next wake up and goes back
-        * to the long hangcheck interval if still required.
-        */
-       clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
-
-       if (b->irq_enabled)
-               irq_enable(engine);
-       else
-               irq_disable(engine);
-
-       spin_unlock_irqrestore(&b->irq_lock, flags);
-}
-
-void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
+       if (list_empty(&b->signalers))
+               return;
 
-       /* The engines should be idle and all requests accounted for! */
-       WARN_ON(READ_ONCE(b->irq_wait));
-       WARN_ON(!RB_EMPTY_ROOT(&b->waiters));
-       WARN_ON(!list_empty(&b->signals));
+       drm_printf(p, "Signals:\n");
 
-       if (!IS_ERR_OR_NULL(b->signaler))
-               kthread_stop(b->signaler);
+       spin_lock_irq(&b->irq_lock);
+       list_for_each_entry(ce, &b->signalers, signal_link) {
+               list_for_each_entry(rq, &ce->signals, signal_link) {
+                       drm_printf(p, "\t[%llx:%llx%s] @ %dms\n",
+                                  rq->fence.context, rq->fence.seqno,
+                                  i915_request_completed(rq) ? "!" :
+                                  i915_request_started(rq) ? "*" :
+                                  "",
+                                  jiffies_to_msecs(jiffies - rq->emitted_jiffies));
+               }
+       }
+       spin_unlock_irq(&b->irq_lock);
 
-       cancel_fake_irq(engine);
+       if (test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings))
+               drm_printf(p, "Fake irq active\n");
 }
-
-#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
-#include "selftests/intel_breadcrumbs.c"
-#endif
 
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 {
        intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-
-       /* After manually advancing the seqno, fake the interrupt in case
-        * there are any waiters for that seqno.
-        */
-       intel_engine_wakeup(engine);
-
        GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno);
 }
 
 
        i915_timeline_set_subclass(&engine->timeline, TIMELINE_ENGINE);
 
+       intel_engine_init_breadcrumbs(engine);
        intel_engine_init_execlist(engine);
        intel_engine_init_hangcheck(engine);
        intel_engine_init_batch_pool(engine);
                }
        }
 
-       ret = intel_engine_init_breadcrumbs(engine);
-       if (ret)
-               goto err_unpin_preempt;
-
        ret = measure_breadcrumb_dw(engine);
        if (ret < 0)
-               goto err_breadcrumbs;
+               goto err_unpin_preempt;
 
        engine->emit_fini_breadcrumb_dw = ret;
 
        return 0;
 
-err_breadcrumbs:
-       intel_engine_fini_breadcrumbs(engine);
 err_unpin_preempt:
        if (i915->preempt_context)
                __intel_context_unpin(i915->preempt_context, engine);
 
        x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf));
 
-       drm_printf(m, "%s%x%s [%llx:%llx]%s @ %dms: %s\n",
+       drm_printf(m, "%s%x%s%s [%llx:%llx]%s @ %dms: %s\n",
                   prefix,
                   rq->global_seqno,
                   i915_request_completed(rq) ? "!" :
                   i915_request_started(rq) ? "*" :
                   "",
+                  test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
+                           &rq->fence.flags) ?  "+" : "",
                   rq->fence.context, rq->fence.seqno,
                   buf,
                   jiffies_to_msecs(jiffies - rq->emitted_jiffies),
                       struct drm_printer *m,
                       const char *header, ...)
 {
-       struct intel_breadcrumbs * const b = &engine->breadcrumbs;
        struct i915_gpu_error * const error = &engine->i915->gpu_error;
        struct i915_request *rq;
        intel_wakeref_t wakeref;
-       unsigned long flags;
-       struct rb_node *rb;
 
        if (header) {
                va_list ap;
 
        intel_execlists_show_requests(engine, m, print_request, 8);
 
-       spin_lock_irqsave(&b->rb_lock, flags);
-       for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-               struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-               drm_printf(m, "\t%s [%d:%c] waiting for %x\n",
-                          w->tsk->comm, w->tsk->pid,
-                          task_state_to_char(w->tsk),
-                          w->seqno);
-       }
-       spin_unlock_irqrestore(&b->rb_lock, flags);
-
        drm_printf(m, "HWSP:\n");
        hexdump(m, engine->status_page.addr, PAGE_SIZE);
 
        drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
+
+       intel_engine_print_breadcrumbs(engine, m);
 }
 
 static u8 user_class_map[] = {
 
        }
 
        /* Papering over lost _interrupts_ immediately following the restart */
-       intel_engine_wakeup(engine);
+       intel_engine_queue_breadcrumbs(engine);
 out:
        intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
 
 #include <drm/drm_util.h>
 
 #include <linux/hashtable.h>
+#include <linux/irq_work.h>
 #include <linux/seqlock.h>
 
 #include "i915_gem_batch_pool.h"
         * the overhead of waking that client is much preferred.
         */
        struct intel_breadcrumbs {
-               spinlock_t irq_lock; /* protects irq_*; irqsafe */
-               struct intel_wait *irq_wait; /* oldest waiter by retirement */
+               spinlock_t irq_lock;
+               struct list_head signalers;
 
-               spinlock_t rb_lock; /* protects the rb and wraps irq_lock */
-               struct rb_root waiters; /* sorted by retirement, priority */
-               struct list_head signals; /* sorted by retirement */
-               struct task_struct *signaler; /* used for fence signalling */
+               struct irq_work irq_work; /* for use from inside irq_lock */
 
                struct timer_list fake_irq; /* used after a missed interrupt */
                struct timer_list hangcheck; /* detect missed interrupts */
 
                unsigned int hangcheck_interrupts;
                unsigned int irq_enabled;
-               unsigned int irq_count;
 
-               bool irq_armed : 1;
+               bool irq_armed;
+               bool irq_fired;
        } breadcrumbs;
 
        struct {
 void intel_engine_get_instdone(struct intel_engine_cs *engine,
                               struct intel_instdone *instdone);
 
-/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
-int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
-
-static inline void intel_wait_init(struct intel_wait *wait)
-{
-       wait->tsk = current;
-       wait->request = NULL;
-}
-
-static inline void intel_wait_init_for_seqno(struct intel_wait *wait, u32 seqno)
-{
-       wait->tsk = current;
-       wait->seqno = seqno;
-}
-
-static inline bool intel_wait_has_seqno(const struct intel_wait *wait)
-{
-       return wait->seqno;
-}
-
-static inline bool
-intel_wait_update_seqno(struct intel_wait *wait, u32 seqno)
-{
-       wait->seqno = seqno;
-       return intel_wait_has_seqno(wait);
-}
-
-static inline bool
-intel_wait_update_request(struct intel_wait *wait,
-                         const struct i915_request *rq)
-{
-       return intel_wait_update_seqno(wait, i915_request_global_seqno(rq));
-}
-
-static inline bool
-intel_wait_check_seqno(const struct intel_wait *wait, u32 seqno)
-{
-       return wait->seqno == seqno;
-}
-
-static inline bool
-intel_wait_check_request(const struct intel_wait *wait,
-                        const struct i915_request *rq)
-{
-       return intel_wait_check_seqno(wait, i915_request_global_seqno(rq));
-}
+void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
-static inline bool intel_wait_complete(const struct intel_wait *wait)
-{
-       return RB_EMPTY_NODE(&wait->node);
-}
+void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
+void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
 
-bool intel_engine_add_wait(struct intel_engine_cs *engine,
-                          struct intel_wait *wait);
-void intel_engine_remove_wait(struct intel_engine_cs *engine,
-                             struct intel_wait *wait);
-bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup);
-void intel_engine_cancel_signaling(struct i915_request *request);
+bool intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine);
+void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 
-static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine)
+static inline void
+intel_engine_queue_breadcrumbs(struct intel_engine_cs *engine)
 {
-       return READ_ONCE(engine->breadcrumbs.irq_wait);
+       irq_work_queue(&engine->breadcrumbs.irq_work);
 }
 
-unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
-#define ENGINE_WAKEUP_WAITER BIT(0)
-#define ENGINE_WAKEUP_ASLEEP BIT(1)
-
-void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
-void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
-
-void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
-void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
+bool intel_engine_breadcrumbs_irq(struct intel_engine_cs *engine);
 
 void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
 
+void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine,
+                                   struct drm_printer *p);
+
 static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 {
        memset(batch, 0, 6 * sizeof(u32));
 
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
 selftest(engine, intel_engine_cs_mock_selftests)
-selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
 selftest(timelines, i915_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
 selftest(objects, i915_gem_object_mock_selftests)
 
 #include <linux/prime_numbers.h>
 
 #include "../i915_selftest.h"
+#include "i915_random.h"
 #include "igt_live_test.h"
+#include "lib_sw_fence.h"
 
 #include "mock_context.h"
+#include "mock_drm.h"
 #include "mock_gem_device.h"
 
 static int igt_add_request(void *arg)
        return err;
 }
 
+struct smoketest {
+       struct intel_engine_cs *engine;
+       struct i915_gem_context **contexts;
+       atomic_long_t num_waits, num_fences;
+       int ncontexts, max_batch;
+       struct i915_request *(*request_alloc)(struct i915_gem_context *,
+                                             struct intel_engine_cs *);
+};
+
+static struct i915_request *
+__mock_request_alloc(struct i915_gem_context *ctx,
+                    struct intel_engine_cs *engine)
+{
+       return mock_request(engine, ctx, 0);
+}
+
+static struct i915_request *
+__live_request_alloc(struct i915_gem_context *ctx,
+                    struct intel_engine_cs *engine)
+{
+       return i915_request_alloc(engine, ctx);
+}
+
+static int __igt_breadcrumbs_smoketest(void *arg)
+{
+       struct smoketest *t = arg;
+       struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
+       const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
+       const unsigned int total = 4 * t->ncontexts + 1;
+       unsigned int num_waits = 0, num_fences = 0;
+       struct i915_request **requests;
+       I915_RND_STATE(prng);
+       unsigned int *order;
+       int err = 0;
+
+       /*
+        * A very simple test to catch the most egregious of list handling bugs.
+        *
+        * At its heart, we simply create oodles of requests running across
+        * multiple kthreads and enable signaling on them, for the sole purpose
+        * of stressing our breadcrumb handling. The only inspection we do is
+        * that the fences were marked as signaled.
+        */
+
+       requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
+       if (!requests)
+               return -ENOMEM;
+
+       order = i915_random_order(total, &prng);
+       if (!order) {
+               err = -ENOMEM;
+               goto out_requests;
+       }
+
+       while (!kthread_should_stop()) {
+               struct i915_sw_fence *submit, *wait;
+               unsigned int n, count;
+
+               submit = heap_fence_create(GFP_KERNEL);
+               if (!submit) {
+                       err = -ENOMEM;
+                       break;
+               }
+
+               wait = heap_fence_create(GFP_KERNEL);
+               if (!wait) {
+                       i915_sw_fence_commit(submit);
+                       heap_fence_put(submit);
+                       err = ENOMEM;
+                       break;
+               }
+
+               i915_random_reorder(order, total, &prng);
+               count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
+
+               for (n = 0; n < count; n++) {
+                       struct i915_gem_context *ctx =
+                               t->contexts[order[n] % t->ncontexts];
+                       struct i915_request *rq;
+
+                       mutex_lock(BKL);
+
+                       rq = t->request_alloc(ctx, t->engine);
+                       if (IS_ERR(rq)) {
+                               mutex_unlock(BKL);
+                               err = PTR_ERR(rq);
+                               count = n;
+                               break;
+                       }
+
+                       err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+                                                              submit,
+                                                              GFP_KERNEL);
+
+                       requests[n] = i915_request_get(rq);
+                       i915_request_add(rq);
+
+                       mutex_unlock(BKL);
+
+                       if (err >= 0)
+                               err = i915_sw_fence_await_dma_fence(wait,
+                                                                   &rq->fence,
+                                                                   0,
+                                                                   GFP_KERNEL);
+
+                       if (err < 0) {
+                               i915_request_put(rq);
+                               count = n;
+                               break;
+                       }
+               }
+
+               i915_sw_fence_commit(submit);
+               i915_sw_fence_commit(wait);
+
+               if (!wait_event_timeout(wait->wait,
+                                       i915_sw_fence_done(wait),
+                                       HZ / 2)) {
+                       struct i915_request *rq = requests[count - 1];
+
+                       pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
+                              count,
+                              rq->fence.context, rq->fence.seqno,
+                              t->engine->name);
+                       i915_gem_set_wedged(t->engine->i915);
+                       GEM_BUG_ON(!i915_request_completed(rq));
+                       i915_sw_fence_wait(wait);
+                       err = -EIO;
+               }
+
+               for (n = 0; n < count; n++) {
+                       struct i915_request *rq = requests[n];
+
+                       if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+                                     &rq->fence.flags)) {
+                               pr_err("%llu:%llu was not signaled!\n",
+                                      rq->fence.context, rq->fence.seqno);
+                               err = -EINVAL;
+                       }
+
+                       i915_request_put(rq);
+               }
+
+               heap_fence_put(wait);
+               heap_fence_put(submit);
+
+               if (err < 0)
+                       break;
+
+               num_fences += count;
+               num_waits++;
+
+               cond_resched();
+       }
+
+       atomic_long_add(num_fences, &t->num_fences);
+       atomic_long_add(num_waits, &t->num_waits);
+
+       kfree(order);
+out_requests:
+       kfree(requests);
+       return err;
+}
+
+static int mock_breadcrumbs_smoketest(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct smoketest t = {
+               .engine = i915->engine[RCS],
+               .ncontexts = 1024,
+               .max_batch = 1024,
+               .request_alloc = __mock_request_alloc
+       };
+       unsigned int ncpus = num_online_cpus();
+       struct task_struct **threads;
+       unsigned int n;
+       int ret = 0;
+
+       /*
+        * Smoketest our breadcrumb/signal handling for requests across multiple
+        * threads. A very simple test to only catch the most egregious of bugs.
+        * See __igt_breadcrumbs_smoketest();
+        */
+
+       threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
+       if (!threads)
+               return -ENOMEM;
+
+       t.contexts =
+               kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
+       if (!t.contexts) {
+               ret = -ENOMEM;
+               goto out_threads;
+       }
+
+       mutex_lock(&t.engine->i915->drm.struct_mutex);
+       for (n = 0; n < t.ncontexts; n++) {
+               t.contexts[n] = mock_context(t.engine->i915, "mock");
+               if (!t.contexts[n]) {
+                       ret = -ENOMEM;
+                       goto out_contexts;
+               }
+       }
+       mutex_unlock(&t.engine->i915->drm.struct_mutex);
+
+       for (n = 0; n < ncpus; n++) {
+               threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
+                                        &t, "igt/%d", n);
+               if (IS_ERR(threads[n])) {
+                       ret = PTR_ERR(threads[n]);
+                       ncpus = n;
+                       break;
+               }
+
+               get_task_struct(threads[n]);
+       }
+
+       msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
+
+       for (n = 0; n < ncpus; n++) {
+               int err;
+
+               err = kthread_stop(threads[n]);
+               if (err < 0 && !ret)
+                       ret = err;
+
+               put_task_struct(threads[n]);
+       }
+       pr_info("Completed %lu waits for %lu fence across %d cpus\n",
+               atomic_long_read(&t.num_waits),
+               atomic_long_read(&t.num_fences),
+               ncpus);
+
+       mutex_lock(&t.engine->i915->drm.struct_mutex);
+out_contexts:
+       for (n = 0; n < t.ncontexts; n++) {
+               if (!t.contexts[n])
+                       break;
+               mock_context_close(t.contexts[n]);
+       }
+       mutex_unlock(&t.engine->i915->drm.struct_mutex);
+       kfree(t.contexts);
+out_threads:
+       kfree(threads);
+
+       return ret;
+}
+
 int i915_request_mock_selftests(void)
 {
        static const struct i915_subtest tests[] = {
                SUBTEST(igt_wait_request),
                SUBTEST(igt_fence_wait),
                SUBTEST(igt_request_rewind),
+               SUBTEST(mock_breadcrumbs_smoketest),
        };
        struct drm_i915_private *i915;
        intel_wakeref_t wakeref;
        return err;
 }
 
+static int
+max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+       struct i915_request *rq;
+       int ret;
+
+       /*
+        * Before execlists, all contexts share the same ringbuffer. With
+        * execlists, each context/engine has a separate ringbuffer and
+        * for the purposes of this test, inexhaustible.
+        *
+        * For the global ringbuffer though, we have to be very careful
+        * that we do not wrap while preventing the execution of requests
+        * with a unsignaled fence.
+        */
+       if (HAS_EXECLISTS(ctx->i915))
+               return INT_MAX;
+
+       rq = i915_request_alloc(engine, ctx);
+       if (IS_ERR(rq)) {
+               ret = PTR_ERR(rq);
+       } else {
+               int sz;
+
+               ret = rq->ring->size - rq->reserved_space;
+               i915_request_add(rq);
+
+               sz = rq->ring->emit - rq->head;
+               if (sz < 0)
+                       sz += rq->ring->size;
+               ret /= sz;
+               ret /= 2; /* leave half spare, in case of emergency! */
+       }
+
+       return ret;
+}
+
+static int live_breadcrumbs_smoketest(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct smoketest t[I915_NUM_ENGINES];
+       unsigned int ncpus = num_online_cpus();
+       unsigned long num_waits, num_fences;
+       struct intel_engine_cs *engine;
+       struct task_struct **threads;
+       struct igt_live_test live;
+       enum intel_engine_id id;
+       intel_wakeref_t wakeref;
+       struct drm_file *file;
+       unsigned int n;
+       int ret = 0;
+
+       /*
+        * Smoketest our breadcrumb/signal handling for requests across multiple
+        * threads. A very simple test to only catch the most egregious of bugs.
+        * See __igt_breadcrumbs_smoketest();
+        *
+        * On real hardware this time.
+        */
+
+       wakeref = intel_runtime_pm_get(i915);
+
+       file = mock_file(i915);
+       if (IS_ERR(file)) {
+               ret = PTR_ERR(file);
+               goto out_rpm;
+       }
+
+       threads = kcalloc(ncpus * I915_NUM_ENGINES,
+                         sizeof(*threads),
+                         GFP_KERNEL);
+       if (!threads) {
+               ret = -ENOMEM;
+               goto out_file;
+       }
+
+       memset(&t[0], 0, sizeof(t[0]));
+       t[0].request_alloc = __live_request_alloc;
+       t[0].ncontexts = 64;
+       t[0].contexts = kmalloc_array(t[0].ncontexts,
+                                     sizeof(*t[0].contexts),
+                                     GFP_KERNEL);
+       if (!t[0].contexts) {
+               ret = -ENOMEM;
+               goto out_threads;
+       }
+
+       mutex_lock(&i915->drm.struct_mutex);
+       for (n = 0; n < t[0].ncontexts; n++) {
+               t[0].contexts[n] = live_context(i915, file);
+               if (!t[0].contexts[n]) {
+                       ret = -ENOMEM;
+                       goto out_contexts;
+               }
+       }
+
+       ret = igt_live_test_begin(&live, i915, __func__, "");
+       if (ret)
+               goto out_contexts;
+
+       for_each_engine(engine, i915, id) {
+               t[id] = t[0];
+               t[id].engine = engine;
+               t[id].max_batch = max_batches(t[0].contexts[0], engine);
+               if (t[id].max_batch < 0) {
+                       ret = t[id].max_batch;
+                       mutex_unlock(&i915->drm.struct_mutex);
+                       goto out_flush;
+               }
+               /* One ring interleaved between requests from all cpus */
+               t[id].max_batch /= num_online_cpus() + 1;
+               pr_debug("Limiting batches to %d requests on %s\n",
+                        t[id].max_batch, engine->name);
+
+               for (n = 0; n < ncpus; n++) {
+                       struct task_struct *tsk;
+
+                       tsk = kthread_run(__igt_breadcrumbs_smoketest,
+                                         &t[id], "igt/%d.%d", id, n);
+                       if (IS_ERR(tsk)) {
+                               ret = PTR_ERR(tsk);
+                               mutex_unlock(&i915->drm.struct_mutex);
+                               goto out_flush;
+                       }
+
+                       get_task_struct(tsk);
+                       threads[id * ncpus + n] = tsk;
+               }
+       }
+       mutex_unlock(&i915->drm.struct_mutex);
+
+       msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
+
+out_flush:
+       num_waits = 0;
+       num_fences = 0;
+       for_each_engine(engine, i915, id) {
+               for (n = 0; n < ncpus; n++) {
+                       struct task_struct *tsk = threads[id * ncpus + n];
+                       int err;
+
+                       if (!tsk)
+                               continue;
+
+                       err = kthread_stop(tsk);
+                       if (err < 0 && !ret)
+                               ret = err;
+
+                       put_task_struct(tsk);
+               }
+
+               num_waits += atomic_long_read(&t[id].num_waits);
+               num_fences += atomic_long_read(&t[id].num_fences);
+       }
+       pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
+               num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus);
+
+       mutex_lock(&i915->drm.struct_mutex);
+       ret = igt_live_test_end(&live) ?: ret;
+out_contexts:
+       mutex_unlock(&i915->drm.struct_mutex);
+       kfree(t[0].contexts);
+out_threads:
+       kfree(threads);
+out_file:
+       mock_file_free(i915, file);
+out_rpm:
+       intel_runtime_pm_put(i915, wakeref);
+
+       return ret;
+}
+
 int i915_request_live_selftests(struct drm_i915_private *i915)
 {
        static const struct i915_subtest tests[] = {
                SUBTEST(live_all_engines),
                SUBTEST(live_sequential_engines),
                SUBTEST(live_empty_request),
+               SUBTEST(live_breadcrumbs_smoketest),
        };
 
        if (i915_terminally_wedged(&i915->gpu_error))
 
 
 bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq)
 {
-       if (!wait_event_timeout(rq->execute,
-                               READ_ONCE(rq->global_seqno),
-                               msecs_to_jiffies(10)))
-               return false;
-
        return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq),
                                               rq->fence.seqno),
                             10) &&
 
+++ /dev/null
-/*
- * Copyright Â© 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "../i915_selftest.h"
-#include "i915_random.h"
-
-#include "mock_gem_device.h"
-#include "mock_engine.h"
-
-static int check_rbtree(struct intel_engine_cs *engine,
-                       const unsigned long *bitmap,
-                       const struct intel_wait *waiters,
-                       const int count)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-       struct rb_node *rb;
-       int n;
-
-       if (&b->irq_wait->node != rb_first(&b->waiters)) {
-               pr_err("First waiter does not match first element of wait-tree\n");
-               return -EINVAL;
-       }
-
-       n = find_first_bit(bitmap, count);
-       for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-               struct intel_wait *w = container_of(rb, typeof(*w), node);
-               int idx = w - waiters;
-
-               if (!test_bit(idx, bitmap)) {
-                       pr_err("waiter[%d, seqno=%d] removed but still in wait-tree\n",
-                              idx, w->seqno);
-                       return -EINVAL;
-               }
-
-               if (n != idx) {
-                       pr_err("waiter[%d, seqno=%d] does not match expected next element in tree [%d]\n",
-                              idx, w->seqno, n);
-                       return -EINVAL;
-               }
-
-               n = find_next_bit(bitmap, count, n + 1);
-       }
-
-       return 0;
-}
-
-static int check_completion(struct intel_engine_cs *engine,
-                           const unsigned long *bitmap,
-                           const struct intel_wait *waiters,
-                           const int count)
-{
-       int n;
-
-       for (n = 0; n < count; n++) {
-               if (intel_wait_complete(&waiters[n]) != !!test_bit(n, bitmap))
-                       continue;
-
-               pr_err("waiter[%d, seqno=%d] is %s, but expected %s\n",
-                      n, waiters[n].seqno,
-                      intel_wait_complete(&waiters[n]) ? "complete" : "active",
-                      test_bit(n, bitmap) ? "active" : "complete");
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int check_rbtree_empty(struct intel_engine_cs *engine)
-{
-       struct intel_breadcrumbs *b = &engine->breadcrumbs;
-
-       if (b->irq_wait) {
-               pr_err("Empty breadcrumbs still has a waiter\n");
-               return -EINVAL;
-       }
-
-       if (!RB_EMPTY_ROOT(&b->waiters)) {
-               pr_err("Empty breadcrumbs, but wait-tree not empty\n");
-               return -EINVAL;
-       }
-
-       return 0;
-}
-
-static int igt_random_insert_remove(void *arg)
-{
-       const u32 seqno_bias = 0x1000;
-       I915_RND_STATE(prng);
-       struct intel_engine_cs *engine = arg;
-       struct intel_wait *waiters;
-       const int count = 4096;
-       unsigned int *order;
-       unsigned long *bitmap;
-       int err = -ENOMEM;
-       int n;
-
-       mock_engine_reset(engine);
-
-       waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-       if (!waiters)
-               goto out_engines;
-
-       bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
-                        GFP_KERNEL);
-       if (!bitmap)
-               goto out_waiters;
-
-       order = i915_random_order(count, &prng);
-       if (!order)
-               goto out_bitmap;
-
-       for (n = 0; n < count; n++)
-               intel_wait_init_for_seqno(&waiters[n], seqno_bias + n);
-
-       err = check_rbtree(engine, bitmap, waiters, count);
-       if (err)
-               goto out_order;
-
-       /* Add and remove waiters into the rbtree in random order. At each
-        * step, we verify that the rbtree is correctly ordered.
-        */
-       for (n = 0; n < count; n++) {
-               int i = order[n];
-
-               intel_engine_add_wait(engine, &waiters[i]);
-               __set_bit(i, bitmap);
-
-               err = check_rbtree(engine, bitmap, waiters, count);
-               if (err)
-                       goto out_order;
-       }
-
-       i915_random_reorder(order, count, &prng);
-       for (n = 0; n < count; n++) {
-               int i = order[n];
-
-               intel_engine_remove_wait(engine, &waiters[i]);
-               __clear_bit(i, bitmap);
-
-               err = check_rbtree(engine, bitmap, waiters, count);
-               if (err)
-                       goto out_order;
-       }
-
-       err = check_rbtree_empty(engine);
-out_order:
-       kfree(order);
-out_bitmap:
-       kfree(bitmap);
-out_waiters:
-       kvfree(waiters);
-out_engines:
-       mock_engine_flush(engine);
-       return err;
-}
-
-static int igt_insert_complete(void *arg)
-{
-       const u32 seqno_bias = 0x1000;
-       struct intel_engine_cs *engine = arg;
-       struct intel_wait *waiters;
-       const int count = 4096;
-       unsigned long *bitmap;
-       int err = -ENOMEM;
-       int n, m;
-
-       mock_engine_reset(engine);
-
-       waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-       if (!waiters)
-               goto out_engines;
-
-       bitmap = kcalloc(DIV_ROUND_UP(count, BITS_PER_LONG), sizeof(*bitmap),
-                        GFP_KERNEL);
-       if (!bitmap)
-               goto out_waiters;
-
-       for (n = 0; n < count; n++) {
-               intel_wait_init_for_seqno(&waiters[n], n + seqno_bias);
-               intel_engine_add_wait(engine, &waiters[n]);
-               __set_bit(n, bitmap);
-       }
-       err = check_rbtree(engine, bitmap, waiters, count);
-       if (err)
-               goto out_bitmap;
-
-       /* On each step, we advance the seqno so that several waiters are then
-        * complete (we increase the seqno by increasingly larger values to
-        * retire more and more waiters at once). All retired waiters should
-        * be woken and removed from the rbtree, and so that we check.
-        */
-       for (n = 0; n < count; n = m) {
-               int seqno = 2 * n;
-
-               GEM_BUG_ON(find_first_bit(bitmap, count) != n);
-
-               if (intel_wait_complete(&waiters[n])) {
-                       pr_err("waiter[%d, seqno=%d] completed too early\n",
-                              n, waiters[n].seqno);
-                       err = -EINVAL;
-                       goto out_bitmap;
-               }
-
-               /* complete the following waiters */
-               mock_seqno_advance(engine, seqno + seqno_bias);
-               for (m = n; m <= seqno; m++) {
-                       if (m == count)
-                               break;
-
-                       GEM_BUG_ON(!test_bit(m, bitmap));
-                       __clear_bit(m, bitmap);
-               }
-
-               intel_engine_remove_wait(engine, &waiters[n]);
-               RB_CLEAR_NODE(&waiters[n].node);
-
-               err = check_rbtree(engine, bitmap, waiters, count);
-               if (err) {
-                       pr_err("rbtree corrupt after seqno advance to %d\n",
-                              seqno + seqno_bias);
-                       goto out_bitmap;
-               }
-
-               err = check_completion(engine, bitmap, waiters, count);
-               if (err) {
-                       pr_err("completions after seqno advance to %d failed\n",
-                              seqno + seqno_bias);
-                       goto out_bitmap;
-               }
-       }
-
-       err = check_rbtree_empty(engine);
-out_bitmap:
-       kfree(bitmap);
-out_waiters:
-       kvfree(waiters);
-out_engines:
-       mock_engine_flush(engine);
-       return err;
-}
-
-struct igt_wakeup {
-       struct task_struct *tsk;
-       atomic_t *ready, *set, *done;
-       struct intel_engine_cs *engine;
-       unsigned long flags;
-#define STOP 0
-#define IDLE 1
-       wait_queue_head_t *wq;
-       u32 seqno;
-};
-
-static bool wait_for_ready(struct igt_wakeup *w)
-{
-       DEFINE_WAIT(ready);
-
-       set_bit(IDLE, &w->flags);
-       if (atomic_dec_and_test(w->done))
-               wake_up_var(w->done);
-
-       if (test_bit(STOP, &w->flags))
-               goto out;
-
-       for (;;) {
-               prepare_to_wait(w->wq, &ready, TASK_INTERRUPTIBLE);
-               if (atomic_read(w->ready) == 0)
-                       break;
-
-               schedule();
-       }
-       finish_wait(w->wq, &ready);
-
-out:
-       clear_bit(IDLE, &w->flags);
-       if (atomic_dec_and_test(w->set))
-               wake_up_var(w->set);
-
-       return !test_bit(STOP, &w->flags);
-}
-
-static int igt_wakeup_thread(void *arg)
-{
-       struct igt_wakeup *w = arg;
-       struct intel_wait wait;
-
-       while (wait_for_ready(w)) {
-               GEM_BUG_ON(kthread_should_stop());
-
-               intel_wait_init_for_seqno(&wait, w->seqno);
-               intel_engine_add_wait(w->engine, &wait);
-               for (;;) {
-                       set_current_state(TASK_UNINTERRUPTIBLE);
-                       if (i915_seqno_passed(intel_engine_get_seqno(w->engine),
-                                             w->seqno))
-                               break;
-
-                       if (test_bit(STOP, &w->flags)) /* emergency escape */
-                               break;
-
-                       schedule();
-               }
-               intel_engine_remove_wait(w->engine, &wait);
-               __set_current_state(TASK_RUNNING);
-       }
-
-       return 0;
-}
-
-static void igt_wake_all_sync(atomic_t *ready,
-                             atomic_t *set,
-                             atomic_t *done,
-                             wait_queue_head_t *wq,
-                             int count)
-{
-       atomic_set(set, count);
-       atomic_set(ready, 0);
-       wake_up_all(wq);
-
-       wait_var_event(set, !atomic_read(set));
-       atomic_set(ready, count);
-       atomic_set(done, count);
-}
-
-static int igt_wakeup(void *arg)
-{
-       I915_RND_STATE(prng);
-       struct intel_engine_cs *engine = arg;
-       struct igt_wakeup *waiters;
-       DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-       const int count = 4096;
-       const u32 max_seqno = count / 4;
-       atomic_t ready, set, done;
-       int err = -ENOMEM;
-       int n, step;
-
-       mock_engine_reset(engine);
-
-       waiters = kvmalloc_array(count, sizeof(*waiters), GFP_KERNEL);
-       if (!waiters)
-               goto out_engines;
-
-       /* Create a large number of threads, each waiting on a random seqno.
-        * Multiple waiters will be waiting for the same seqno.
-        */
-       atomic_set(&ready, count);
-       for (n = 0; n < count; n++) {
-               waiters[n].wq = &wq;
-               waiters[n].ready = &ready;
-               waiters[n].set = &set;
-               waiters[n].done = &done;
-               waiters[n].engine = engine;
-               waiters[n].flags = BIT(IDLE);
-
-               waiters[n].tsk = kthread_run(igt_wakeup_thread, &waiters[n],
-                                            "i915/igt:%d", n);
-               if (IS_ERR(waiters[n].tsk))
-                       goto out_waiters;
-
-               get_task_struct(waiters[n].tsk);
-       }
-
-       for (step = 1; step <= max_seqno; step <<= 1) {
-               u32 seqno;
-
-               /* The waiter threads start paused as we assign them a random
-                * seqno and reset the engine. Once the engine is reset,
-                * we signal that the threads may begin their wait upon their
-                * seqno.
-                */
-               for (n = 0; n < count; n++) {
-                       GEM_BUG_ON(!test_bit(IDLE, &waiters[n].flags));
-                       waiters[n].seqno =
-                               1 + prandom_u32_state(&prng) % max_seqno;
-               }
-               mock_seqno_advance(engine, 0);
-               igt_wake_all_sync(&ready, &set, &done, &wq, count);
-
-               /* Simulate the GPU doing chunks of work, with one or more
-                * seqno appearing to finish at the same time. A random number
-                * of threads will be waiting upon the update and hopefully be
-                * woken.
-                */
-               for (seqno = 1; seqno <= max_seqno + step; seqno += step) {
-                       usleep_range(50, 500);
-                       mock_seqno_advance(engine, seqno);
-               }
-               GEM_BUG_ON(intel_engine_get_seqno(engine) < 1 + max_seqno);
-
-               /* With the seqno now beyond any of the waiting threads, they
-                * should all be woken, see that they are complete and signal
-                * that they are ready for the next test. We wait until all
-                * threads are complete and waiting for us (i.e. not a seqno).
-                */
-               if (!wait_var_event_timeout(&done,
-                                           !atomic_read(&done), 10 * HZ)) {
-                       pr_err("Timed out waiting for %d remaining waiters\n",
-                              atomic_read(&done));
-                       err = -ETIMEDOUT;
-                       break;
-               }
-
-               err = check_rbtree_empty(engine);
-               if (err)
-                       break;
-       }
-
-out_waiters:
-       for (n = 0; n < count; n++) {
-               if (IS_ERR(waiters[n].tsk))
-                       break;
-
-               set_bit(STOP, &waiters[n].flags);
-       }
-       mock_seqno_advance(engine, INT_MAX); /* wakeup any broken waiters */
-       igt_wake_all_sync(&ready, &set, &done, &wq, n);
-
-       for (n = 0; n < count; n++) {
-               if (IS_ERR(waiters[n].tsk))
-                       break;
-
-               kthread_stop(waiters[n].tsk);
-               put_task_struct(waiters[n].tsk);
-       }
-
-       kvfree(waiters);
-out_engines:
-       mock_engine_flush(engine);
-       return err;
-}
-
-int intel_breadcrumbs_mock_selftests(void)
-{
-       static const struct i915_subtest tests[] = {
-               SUBTEST(igt_random_insert_remove),
-               SUBTEST(igt_insert_complete),
-               SUBTEST(igt_wakeup),
-       };
-       struct drm_i915_private *i915;
-       int err;
-
-       i915 = mock_gem_device();
-       if (!i915)
-               return -ENOMEM;
-
-       err = i915_subtests(tests, i915->engine[RCS]);
-       drm_dev_put(&i915->drm);
-
-       return err;
-}
 
 
        wait_for_completion(&arg.completion);
 
-       if (wait_for(waitqueue_active(&rq->execute), 10)) {
+       if (wait_for(!list_empty(&rq->fence.cb_list), 10)) {
                struct drm_printer p = drm_info_printer(i915->drm.dev);
 
                pr_err("igt/evict_vma kthread did not wait\n");
 
        destroy_timer_on_stack(&tf->timer);
        i915_sw_fence_fini(&tf->fence);
 }
+
+struct heap_fence {
+       struct i915_sw_fence fence;
+       union {
+               struct kref ref;
+               struct rcu_head rcu;
+       };
+};
+
+static int __i915_sw_fence_call
+heap_fence_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+       struct heap_fence *h = container_of(fence, typeof(*h), fence);
+
+       switch (state) {
+       case FENCE_COMPLETE:
+               break;
+
+       case FENCE_FREE:
+               heap_fence_put(&h->fence);
+       }
+
+       return NOTIFY_DONE;
+}
+
+struct i915_sw_fence *heap_fence_create(gfp_t gfp)
+{
+       struct heap_fence *h;
+
+       h = kmalloc(sizeof(*h), gfp);
+       if (!h)
+               return NULL;
+
+       i915_sw_fence_init(&h->fence, heap_fence_notify);
+       refcount_set(&h->ref.refcount, 2);
+
+       return &h->fence;
+}
+
+static void heap_fence_release(struct kref *ref)
+{
+       struct heap_fence *h = container_of(ref, typeof(*h), ref);
+
+       i915_sw_fence_fini(&h->fence);
+
+       kfree_rcu(h, rcu);
+}
+
+void heap_fence_put(struct i915_sw_fence *fence)
+{
+       struct heap_fence *h = container_of(fence, typeof(*h), fence);
+
+       kref_put(&h->ref, heap_fence_release);
+}
 
 void timed_fence_init(struct timed_fence *tf, unsigned long expires);
 void timed_fence_fini(struct timed_fence *tf);
 
+struct i915_sw_fence *heap_fence_create(gfp_t gfp);
+void heap_fence_put(struct i915_sw_fence *fence);
+
 #endif /* _LIB_SW_FENCE_H_ */
 
 static void advance(struct mock_request *request)
 {
        list_del_init(&request->link);
-       mock_seqno_advance(request->base.engine, request->base.global_seqno);
+       intel_engine_write_global_seqno(request->base.engine,
+                                       request->base.global_seqno);
        i915_request_mark_complete(&request->base);
        GEM_BUG_ON(!i915_request_completed(&request->base));
+
+       intel_engine_queue_breadcrumbs(request->base.engine);
 }
 
 static void hw_delay_complete(struct timer_list *t)
 {
        struct mock_engine *engine = from_timer(engine, t, hw_delay);
        struct mock_request *request;
+       unsigned long flags;
 
-       spin_lock(&engine->hw_lock);
+       spin_lock_irqsave(&engine->hw_lock, flags);
 
        /* Timer fired, first request is complete */
        request = first_request(engine);
                advance(request);
        }
 
-       spin_unlock(&engine->hw_lock);
+       spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
 static void mock_context_unpin(struct intel_context *ce)
        struct mock_request *mock = container_of(request, typeof(*mock), base);
        struct mock_engine *engine =
                container_of(request->engine, typeof(*engine), base);
+       unsigned long flags;
 
        i915_request_submit(request);
        GEM_BUG_ON(!request->global_seqno);
 
-       spin_lock_irq(&engine->hw_lock);
+       spin_lock_irqsave(&engine->hw_lock, flags);
        list_add_tail(&mock->link, &engine->hw_queue);
        if (mock->link.prev == &engine->hw_queue) {
                if (mock->delay)
                else
                        advance(mock);
        }
-       spin_unlock_irq(&engine->hw_lock);
+       spin_unlock_irqrestore(&engine->hw_lock, flags);
 }
 
 struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
 
 void mock_engine_reset(struct intel_engine_cs *engine)
 {
-       intel_write_status_page(engine, I915_GEM_HWS_INDEX, 0);
+       intel_engine_write_global_seqno(engine, 0);
 }
 
 void mock_engine_free(struct intel_engine_cs *engine)
 
 void mock_engine_reset(struct intel_engine_cs *engine);
 void mock_engine_free(struct intel_engine_cs *engine);
 
-static inline void mock_seqno_advance(struct intel_engine_cs *engine, u32 seqno)
-{
-       intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
-       intel_engine_wakeup(engine);
-}
-
 #endif /* !__MOCK_ENGINE_H__ */