seq_puts(m, "Wedged\n");
        if (test_bit(I915_RESET_BACKOFF, &dev_priv->gpu_error.flags))
                seq_puts(m, "Reset in progress: struct_mutex backoff\n");
-       if (test_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags))
-               seq_puts(m, "Reset in progress: reset handoff to waiter\n");
        if (waitqueue_active(&dev_priv->gpu_error.wait_queue))
                seq_puts(m, "Waiter holding struct mutex\n");
        if (waitqueue_active(&dev_priv->gpu_error.reset_queue))
                struct rb_node *rb;
 
                seq_printf(m, "%s:\n", engine->name);
-               seq_printf(m, "\tseqno = %x [current %x, last %x]\n",
+               seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
                           engine->hangcheck.seqno, seqno[id],
-                          intel_engine_last_submit(engine));
-               seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n",
+                          intel_engine_last_submit(engine),
+                          jiffies_to_msecs(jiffies -
+                                           engine->hangcheck.action_timestamp));
+               seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
                           yesno(intel_engine_has_waiter(engine)),
                           yesno(test_bit(engine->id,
-                                         &dev_priv->gpu_error.missed_irq_rings)),
-                          yesno(engine->hangcheck.stalled),
-                          yesno(engine->hangcheck.wedged));
+                                         &dev_priv->gpu_error.missed_irq_rings)));
 
                spin_lock_irq(&b->rb_lock);
                for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
                seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
                           (long long)engine->hangcheck.acthd,
                           (long long)acthd[id]);
-               seq_printf(m, "\taction = %s(%d) %d ms ago\n",
-                          hangcheck_action_to_str(engine->hangcheck.action),
-                          engine->hangcheck.action,
-                          jiffies_to_msecs(jiffies -
-                                           engine->hangcheck.action_timestamp));
 
                if (engine->id == RCS) {
                        seq_puts(m, "\tinstdone read =\n");
 i915_wedged_set(void *data, u64 val)
 {
        struct drm_i915_private *i915 = data;
-       struct intel_engine_cs *engine;
-       unsigned int tmp;
 
        /*
         * There is no safeguard against this debugfs entry colliding
        if (i915_reset_backoff(&i915->gpu_error))
                return -EAGAIN;
 
-       for_each_engine_masked(engine, i915, val, tmp) {
-               engine->hangcheck.seqno = intel_engine_get_seqno(engine);
-               engine->hangcheck.stalled = true;
-       }
-
        i915_handle_error(i915, val, I915_ERROR_CAPTURE,
                          "Manually set wedged engine mask = %llx", val);
-
-       wait_on_bit(&i915->gpu_error.flags,
-                   I915_RESET_HANDOFF,
-                   TASK_UNINTERRUPTIBLE);
-
        return 0;
 }
 
                mutex_unlock(&i915->drm.struct_mutex);
        }
 
-       if (val & DROP_RESET_ACTIVE &&
-           i915_terminally_wedged(&i915->gpu_error)) {
+       if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error))
                i915_handle_error(i915, ALL_ENGINES, 0, NULL);
-               wait_on_bit(&i915->gpu_error.flags,
-                           I915_RESET_HANDOFF,
-                           TASK_UNINTERRUPTIBLE);
-       }
 
        fs_reclaim_acquire(GFP_KERNEL);
        if (val & DROP_BOUND)
 
        return unlikely(test_bit(I915_RESET_BACKOFF, &error->flags));
 }
 
-static inline bool i915_reset_handoff(struct i915_gpu_error *error)
-{
-       return unlikely(test_bit(I915_RESET_HANDOFF, &error->flags));
-}
-
 static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
 {
        return unlikely(test_bit(I915_WEDGED, &error->flags));
 
                     struct intel_rps_client *rps_client)
 {
        might_sleep();
-#if IS_ENABLED(CONFIG_LOCKDEP)
-       GEM_BUG_ON(debug_locks &&
-                  !!lockdep_is_held(&obj->base.dev->struct_mutex) !=
-                  !!(flags & I915_WAIT_LOCKED));
-#endif
        GEM_BUG_ON(timeout < 0);
 
        timeout = i915_gem_object_wait_reservation(obj->resv,
 
        GEM_TRACE("\n");
 
-       mutex_lock(&i915->drm.struct_mutex);
-
        wakeref = intel_runtime_pm_get(i915);
        intel_uncore_forcewake_get(i915, FORCEWAKE_ALL);
 
        intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
        intel_runtime_pm_put(i915, wakeref);
 
+       mutex_lock(&i915->drm.struct_mutex);
        i915_gem_contexts_lost(i915);
        mutex_unlock(&i915->drm.struct_mutex);
 }
        wakeref = intel_runtime_pm_get(i915);
        intel_suspend_gt_powersave(i915);
 
+       flush_workqueue(i915->wq);
+
        mutex_lock(&i915->drm.struct_mutex);
 
        /*
        i915_retire_requests(i915); /* ensure we flush after wedging */
 
        mutex_unlock(&i915->drm.struct_mutex);
+       i915_reset_flush(i915);
 
-       intel_uc_suspend(i915);
-
-       cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
-       cancel_delayed_work_sync(&i915->gt.retire_work);
+       drain_delayed_work(&i915->gt.retire_work);
 
        /*
         * As the idle_work is rearming if it detects a race, play safe and
         */
        drain_delayed_work(&i915->gt.idle_work);
 
+       intel_uc_suspend(i915);
+
        /*
         * Assert that we successfully flushed all the work and
         * reset the GPU back to its idle, low power state.
 
 #include <linux/pagevec.h>
 
 #include "i915_request.h"
+#include "i915_reset.h"
 #include "i915_selftest.h"
 #include "i915_timeline.h"
 
 
        err_printf(m, "  waiting: %s\n", yesno(ee->waiting));
        err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
        err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
-       err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled));
-       err_printf(m, "  hangcheck action: %s\n",
-                  hangcheck_action_to_str(ee->hangcheck_action));
-       err_printf(m, "  hangcheck action timestamp: %dms (%lu%s)\n",
+       err_printf(m, "  hangcheck timestamp: %dms (%lu%s)\n",
                   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
                   ee->hangcheck_timestamp,
                   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
                   jiffies_to_msecs(error->capture - error->epoch));
 
        for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
-               if (error->engine[i].hangcheck_stalled &&
-                   error->engine[i].context.pid) {
-                       err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
-                                  engine_name(m->i915, i),
-                                  error->engine[i].context.comm,
-                                  error->engine[i].context.pid,
-                                  error->engine[i].context.ban_score,
-                                  bannable(&error->engine[i].context));
-               }
+               if (!error->engine[i].context.pid)
+                       continue;
+
+               err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
+                          engine_name(m->i915, i),
+                          error->engine[i].context.comm,
+                          error->engine[i].context.pid,
+                          error->engine[i].context.ban_score,
+                          bannable(&error->engine[i].context));
        }
        err_printf(m, "Reset count: %u\n", error->reset_count);
        err_printf(m, "Suspend count: %u\n", error->suspend_count);
        return i;
 }
 
-/* Generate a semi-unique error code. The code is not meant to have meaning, The
+/*
+ * Generate a semi-unique error code. The code is not meant to have meaning, The
  * code's only purpose is to try to prevent false duplicated bug reports by
  * grossly estimating a GPU error state.
  *
  *
  * It's only a small step better than a random number in its current form.
  */
-static u32 i915_error_generate_code(struct drm_i915_private *dev_priv,
-                                   struct i915_gpu_state *error,
-                                   int *engine_id)
+static u32 i915_error_generate_code(struct i915_gpu_state *error,
+                                   unsigned long engine_mask)
 {
-       u32 error_code = 0;
-       int i;
-
-       /* IPEHR would be an ideal way to detect errors, as it's the gross
+       /*
+        * IPEHR would be an ideal way to detect errors, as it's the gross
         * measure of "the command that hung." However, has some very common
         * synchronization commands which almost always appear in the case
         * strictly a client bug. Use instdone to differentiate those some.
         */
-       for (i = 0; i < I915_NUM_ENGINES; i++) {
-               if (error->engine[i].hangcheck_stalled) {
-                       if (engine_id)
-                               *engine_id = i;
+       if (engine_mask) {
+               struct drm_i915_error_engine *ee =
+                       &error->engine[ffs(engine_mask)];
 
-                       return error->engine[i].ipehr ^
-                              error->engine[i].instdone.instdone;
-               }
+               return ee->ipehr ^ ee->instdone.instdone;
        }
 
-       return error_code;
+       return 0;
 }
 
 static void gem_record_fences(struct i915_gpu_state *error)
        }
 
        ee->idle = intel_engine_is_idle(engine);
-       ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
-       ee->hangcheck_action = engine->hangcheck.action;
-       ee->hangcheck_stalled = engine->hangcheck.stalled;
+       if (!ee->idle)
+               ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
        ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
                                                  engine);
 
        error->pgtbl_er = I915_READ(PGTBL_ER);
 }
 
-static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
-                                  struct i915_gpu_state *error,
-                                  u32 engine_mask,
-                                  const char *error_msg)
+static const char *
+error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg)
 {
-       u32 ecode;
-       int engine_id = -1, len;
+       int len;
+       int i;
 
-       ecode = i915_error_generate_code(dev_priv, error, &engine_id);
+       for (i = 0; i < ARRAY_SIZE(error->engine); i++)
+               if (!error->engine[i].context.pid)
+                       engines &= ~BIT(i);
 
        len = scnprintf(error->error_msg, sizeof(error->error_msg),
-                       "GPU HANG: ecode %d:%d:0x%08x",
-                       INTEL_GEN(dev_priv), engine_id, ecode);
-
-       if (engine_id != -1 && error->engine[engine_id].context.pid)
+                       "GPU HANG: ecode %d:%lx:0x%08x",
+                       INTEL_GEN(error->i915), engines,
+                       i915_error_generate_code(error, engines));
+       if (engines) {
+               /* Just show the first executing process, more is confusing */
+               i = ffs(engines);
                len += scnprintf(error->error_msg + len,
                                 sizeof(error->error_msg) - len,
                                 ", in %s [%d]",
-                                error->engine[engine_id].context.comm,
-                                error->engine[engine_id].context.pid);
+                                error->engine[i].context.comm,
+                                error->engine[i].context.pid);
+       }
+       if (msg)
+               len += scnprintf(error->error_msg + len,
+                                sizeof(error->error_msg) - len,
+                                ", %s", msg);
 
-       scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
-                 ", reason: %s, action: %s",
-                 error_msg,
-                 engine_mask ? "reset" : "continue");
+       return error->error_msg;
 }
 
 static void capture_gen_state(struct i915_gpu_state *error)
        for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
                const struct drm_i915_error_engine *ee = &error->engine[i];
 
-               if (ee->hangcheck_stalled &&
+               if (ee->hangcheck_timestamp &&
                    time_before(ee->hangcheck_timestamp, epoch))
                        epoch = ee->hangcheck_timestamp;
        }
  * i915_capture_error_state - capture an error record for later analysis
  * @i915: i915 device
  * @engine_mask: the mask of engines triggering the hang
- * @error_msg: a message to insert into the error capture header
+ * @msg: a message to insert into the error capture header
  *
  * Should be called when an error is detected (either a hang or an error
  * interrupt) to capture error state from the time of the error.  Fills
  * to pick up.
  */
 void i915_capture_error_state(struct drm_i915_private *i915,
-                             u32 engine_mask,
-                             const char *error_msg)
+                             unsigned long engine_mask,
+                             const char *msg)
 {
        static bool warned;
        struct i915_gpu_state *error;
        if (IS_ERR(error))
                return;
 
-       i915_error_capture_msg(i915, error, engine_mask, error_msg);
-       DRM_INFO("%s\n", error->error_msg);
+       dev_info(i915->drm.dev, "%s\n", error_msg(error, engine_mask, msg));
 
        if (!error->simulated) {
                spin_lock_irqsave(&i915->gpu_error.lock, flags);
 
                bool waiting;
                int num_waiters;
                unsigned long hangcheck_timestamp;
-               bool hangcheck_stalled;
-               enum intel_engine_hangcheck_action hangcheck_action;
                struct i915_address_space *vm;
                int num_requests;
                u32 reset_count;
        struct scatterlist *sgl, *fit;
 };
 
+struct i915_gpu_restart;
+
 struct i915_gpu_error {
        /* For hangcheck timer */
 #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */
         * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a
         * secondary role in preventing two concurrent global reset attempts.
         *
-        * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the
-        * struct_mutex. We try to acquire the struct_mutex in the reset worker,
-        * but it may be held by some long running waiter (that we cannot
-        * interrupt without causing trouble). Once we are ready to do the GPU
-        * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If
-        * they already hold the struct_mutex and want to participate they can
-        * inspect the bit and do the reset directly, otherwise the worker
-        * waits for the struct_mutex.
-        *
         * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to
         * acquire the struct_mutex to reset an engine, we need an explicit
         * flag to prevent two concurrent reset attempts in the same engine.
         */
        unsigned long flags;
 #define I915_RESET_BACKOFF     0
-#define I915_RESET_HANDOFF     1
-#define I915_RESET_MODESET     2
-#define I915_RESET_ENGINE      3
+#define I915_RESET_MODESET     1
+#define I915_RESET_ENGINE      2
 #define I915_WEDGED            (BITS_PER_LONG - 1)
 
        /** Number of times an engine has been reset */
        u32 reset_engine_count[I915_NUM_ENGINES];
 
-       /** Set of stalled engines with guilty requests, in the current reset */
-       u32 stalled_mask;
-
-       /** Reason for the current *global* reset */
-       const char *reason;
-
        struct mutex wedge_mutex; /* serialises wedging/unwedging */
 
        /**
 
        /* For missed irq/seqno simulation. */
        unsigned long test_irq_rings;
+
+       struct i915_gpu_restart *restart;
 };
 
 struct drm_i915_error_state_buf {
 
 struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915);
 void i915_capture_error_state(struct drm_i915_private *dev_priv,
-                             u32 engine_mask,
+                             unsigned long engine_mask,
                              const char *error_msg);
 
 static inline struct i915_gpu_state *
 
        return false;
 }
 
-static bool __i915_wait_request_check_and_reset(struct i915_request *request)
-{
-       struct i915_gpu_error *error = &request->i915->gpu_error;
-
-       if (likely(!i915_reset_handoff(error)))
-               return false;
-
-       __set_current_state(TASK_RUNNING);
-       i915_reset(request->i915, error->stalled_mask, error->reason);
-       return true;
-}
-
 /**
  * i915_request_wait - wait until execution of request has finished
  * @rq: the request to wait upon
 {
        const int state = flags & I915_WAIT_INTERRUPTIBLE ?
                TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
-       wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
-       DEFINE_WAIT_FUNC(reset, default_wake_function);
        DEFINE_WAIT_FUNC(exec, default_wake_function);
        struct intel_wait wait;
 
        might_sleep();
-#if IS_ENABLED(CONFIG_LOCKDEP)
-       GEM_BUG_ON(debug_locks &&
-                  !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
-                  !!(flags & I915_WAIT_LOCKED));
-#endif
        GEM_BUG_ON(timeout < 0);
 
        if (i915_request_completed(rq))
                return -ETIME;
 
        trace_i915_request_wait_begin(rq, flags);
-
        add_wait_queue(&rq->execute, &exec);
-       if (flags & I915_WAIT_LOCKED)
-               add_wait_queue(errq, &reset);
-
        intel_wait_init(&wait);
        if (flags & I915_WAIT_PRIORITY)
                i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
                if (intel_wait_update_request(&wait, rq))
                        break;
 
-               if (flags & I915_WAIT_LOCKED &&
-                   __i915_wait_request_check_and_reset(rq))
-                       continue;
-
                if (signal_pending_state(state, current)) {
                        timeout = -ERESTARTSYS;
                        goto complete;
                 */
                goto wakeup;
 
-       if (flags & I915_WAIT_LOCKED)
-               __i915_wait_request_check_and_reset(rq);
-
        for (;;) {
                if (signal_pending_state(state, current)) {
                        timeout = -ERESTARTSYS;
                if (i915_request_completed(rq))
                        break;
 
-               /*
-                * If the GPU is hung, and we hold the lock, reset the GPU
-                * and then check for completion. On a full reset, the engine's
-                * HW seqno will be advanced passed us and we are complete.
-                * If we do a partial reset, we have to wait for the GPU to
-                * resume and update the breadcrumb.
-                *
-                * If we don't hold the mutex, we can just wait for the worker
-                * to come along and update the breadcrumb (either directly
-                * itself, or indirectly by recovering the GPU).
-                */
-               if (flags & I915_WAIT_LOCKED &&
-                   __i915_wait_request_check_and_reset(rq))
-                       continue;
-
                /* Only spin if we know the GPU is processing this request */
                if (__i915_spin_request(rq, wait.seqno, state, 2))
                        break;
        intel_engine_remove_wait(rq->engine, &wait);
 complete:
        __set_current_state(TASK_RUNNING);
-       if (flags & I915_WAIT_LOCKED)
-               remove_wait_queue(errq, &reset);
        remove_wait_queue(&rq->execute, &exec);
        trace_i915_request_wait_end(rq);
 
 
  */
 
 #include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
 
 #define RESET_MAX_RETRIES 3
 
+/* XXX How to handle concurrent GGTT updates using tiling registers? */
+#define RESET_UNDER_STOP_MACHINE 0
+
 static void engine_skip_context(struct i915_request *rq)
 {
        struct intel_engine_cs *engine = rq->engine;
        struct i915_gem_context *hung_ctx = rq->gem_context;
        struct i915_timeline *timeline = rq->timeline;
-       unsigned long flags;
 
+       lockdep_assert_held(&engine->timeline.lock);
        GEM_BUG_ON(timeline == &engine->timeline);
 
-       spin_lock_irqsave(&engine->timeline.lock, flags);
        spin_lock(&timeline->lock);
 
-       list_for_each_entry_continue(rq, &engine->timeline.requests, link)
-               if (rq->gem_context == hung_ctx)
-                       i915_request_skip(rq, -EIO);
+       if (rq->global_seqno) {
+               list_for_each_entry_continue(rq,
+                                            &engine->timeline.requests, link)
+                       if (rq->gem_context == hung_ctx)
+                               i915_request_skip(rq, -EIO);
+       }
 
        list_for_each_entry(rq, &timeline->requests, link)
                i915_request_skip(rq, -EIO);
 
        spin_unlock(&timeline->lock);
-       spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void client_mark_guilty(struct drm_i915_file_private *file_priv,
        }
 }
 
-static void context_mark_guilty(struct i915_gem_context *ctx)
+static bool context_mark_guilty(struct i915_gem_context *ctx)
 {
        unsigned int score;
        bool banned, bannable;
 
        /* Cool contexts don't accumulate client ban score */
        if (!bannable)
-               return;
+               return false;
 
        if (banned) {
                DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, banned\n",
 
        if (!IS_ERR_OR_NULL(ctx->file_priv))
                client_mark_guilty(ctx->file_priv, ctx);
+
+       return banned;
 }
 
 static void context_mark_innocent(struct i915_gem_context *ctx)
        atomic_inc(&ctx->active_count);
 }
 
+void i915_reset_request(struct i915_request *rq, bool guilty)
+{
+       lockdep_assert_held(&rq->engine->timeline.lock);
+       GEM_BUG_ON(i915_request_completed(rq));
+
+       if (guilty) {
+               i915_request_skip(rq, -EIO);
+               if (context_mark_guilty(rq->gem_context))
+                       engine_skip_context(rq);
+       } else {
+               dma_fence_set_error(&rq->fence, -EAGAIN);
+               context_mark_innocent(rq->gem_context);
+       }
+}
+
 static void gen3_stop_engine(struct intel_engine_cs *engine)
 {
        struct drm_i915_private *dev_priv = engine->i915;
  * Ensure irq handler finishes, and not run again.
  * Also return the active request so that we only search for it once.
  */
-static struct i915_request *
-reset_prepare_engine(struct intel_engine_cs *engine)
+static void reset_prepare_engine(struct intel_engine_cs *engine)
 {
-       struct i915_request *rq;
-
        /*
         * During the reset sequence, we must prevent the engine from
         * entering RC6. As the context state is undefined until we restart
         * GPU state upon resume, i.e. fail to restart after a reset.
         */
        intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL);
-
-       rq = engine->reset.prepare(engine);
-       if (rq && rq->fence.error == -EIO)
-               rq = ERR_PTR(-EIO); /* Previous reset failed! */
-
-       return rq;
+       engine->reset.prepare(engine);
 }
 
-static int reset_prepare(struct drm_i915_private *i915)
+static void reset_prepare(struct drm_i915_private *i915)
 {
        struct intel_engine_cs *engine;
-       struct i915_request *rq;
        enum intel_engine_id id;
-       int err = 0;
 
-       for_each_engine(engine, i915, id) {
-               rq = reset_prepare_engine(engine);
-               if (IS_ERR(rq)) {
-                       err = PTR_ERR(rq);
-                       continue;
-               }
-
-               engine->hangcheck.active_request = rq;
-       }
+       for_each_engine(engine, i915, id)
+               reset_prepare_engine(engine);
 
-       i915_gem_revoke_fences(i915);
        intel_uc_sanitize(i915);
-
-       return err;
 }
 
-/* Returns the request if it was guilty of the hang */
-static struct i915_request *
-reset_request(struct intel_engine_cs *engine,
-             struct i915_request *rq,
-             bool stalled)
+static int gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
 {
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       int err;
+
        /*
-        * The guilty request will get skipped on a hung engine.
-        *
-        * Users of client default contexts do not rely on logical
-        * state preserved between batches so it is safe to execute
-        * queued requests following the hang. Non default contexts
-        * rely on preserved state, so skipping a batch loses the
-        * evolution of the state and it needs to be considered corrupted.
-        * Executing more queued batches on top of corrupted state is
-        * risky. But we take the risk by trying to advance through
-        * the queued requests in order to make the client behaviour
-        * more predictable around resets, by not throwing away random
-        * amount of batches it has prepared for execution. Sophisticated
-        * clients can use gem_reset_stats_ioctl and dma fence status
-        * (exported via sync_file info ioctl on explicit fences) to observe
-        * when it loses the context state and should rebuild accordingly.
-        *
-        * The context ban, and ultimately the client ban, mechanism are safety
-        * valves if client submission ends up resulting in nothing more than
-        * subsequent hangs.
+        * Everything depends on having the GTT running, so we need to start
+        * there.
         */
+       err = i915_ggtt_enable_hw(i915);
+       if (err)
+               return err;
 
-       if (i915_request_completed(rq)) {
-               GEM_TRACE("%s pardoned global=%d (fence %llx:%lld), current %d\n",
-                         engine->name, rq->global_seqno,
-                         rq->fence.context, rq->fence.seqno,
-                         intel_engine_get_seqno(engine));
-               stalled = false;
-       }
-
-       if (stalled) {
-               context_mark_guilty(rq->gem_context);
-               i915_request_skip(rq, -EIO);
+       for_each_engine(engine, i915, id)
+               intel_engine_reset(engine, stalled_mask & ENGINE_MASK(id));
 
-               /* If this context is now banned, skip all pending requests. */
-               if (i915_gem_context_is_banned(rq->gem_context))
-                       engine_skip_context(rq);
-       } else {
-               /*
-                * Since this is not the hung engine, it may have advanced
-                * since the hang declaration. Double check by refinding
-                * the active request at the time of the reset.
-                */
-               rq = i915_gem_find_active_request(engine);
-               if (rq) {
-                       unsigned long flags;
-
-                       context_mark_innocent(rq->gem_context);
-                       dma_fence_set_error(&rq->fence, -EAGAIN);
-
-                       /* Rewind the engine to replay the incomplete rq */
-                       spin_lock_irqsave(&engine->timeline.lock, flags);
-                       rq = list_prev_entry(rq, link);
-                       if (&rq->link == &engine->timeline.requests)
-                               rq = NULL;
-                       spin_unlock_irqrestore(&engine->timeline.lock, flags);
-               }
-       }
+       i915_gem_restore_fences(i915);
 
-       return rq;
+       return err;
 }
 
-static void reset_engine(struct intel_engine_cs *engine,
-                        struct i915_request *rq,
-                        bool stalled)
+static void reset_finish_engine(struct intel_engine_cs *engine)
 {
-       if (rq)
-               rq = reset_request(engine, rq, stalled);
-
-       /* Setup the CS to resume from the breadcrumb of the hung request */
-       engine->reset.reset(engine, rq);
+       engine->reset.finish(engine);
+       intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
 }
 
-static void gt_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+struct i915_gpu_restart {
+       struct work_struct work;
+       struct drm_i915_private *i915;
+};
+
+static void restart_work(struct work_struct *work)
 {
+       struct i915_gpu_restart *arg = container_of(work, typeof(*arg), work);
+       struct drm_i915_private *i915 = arg->i915;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
+       intel_wakeref_t wakeref;
 
-       lockdep_assert_held(&i915->drm.struct_mutex);
-
-       i915_retire_requests(i915);
+       wakeref = intel_runtime_pm_get(i915);
+       mutex_lock(&i915->drm.struct_mutex);
+       WRITE_ONCE(i915->gpu_error.restart, NULL);
 
        for_each_engine(engine, i915, id) {
-               struct intel_context *ce;
-
-               reset_engine(engine,
-                            engine->hangcheck.active_request,
-                            stalled_mask & ENGINE_MASK(id));
-               ce = fetch_and_zero(&engine->last_retired_context);
-               if (ce)
-                       intel_context_unpin(ce);
+               struct i915_request *rq;
 
                /*
                 * Ostensibily, we always want a context loaded for powersaving,
                 * so if the engine is idle after the reset, send a request
                 * to load our scratch kernel_context.
-                *
-                * More mysteriously, if we leave the engine idle after a reset,
-                * the next userspace batch may hang, with what appears to be
-                * an incoherent read by the CS (presumably stale TLB). An
-                * empty request appears sufficient to paper over the glitch.
                 */
-               if (intel_engine_is_idle(engine)) {
-                       struct i915_request *rq;
+               if (!intel_engine_is_idle(engine))
+                       continue;
 
-                       rq = i915_request_alloc(engine, i915->kernel_context);
-                       if (!IS_ERR(rq))
-                               i915_request_add(rq);
-               }
+               rq = i915_request_alloc(engine, i915->kernel_context);
+               if (!IS_ERR(rq))
+                       i915_request_add(rq);
        }
 
-       i915_gem_restore_fences(i915);
-}
-
-static void reset_finish_engine(struct intel_engine_cs *engine)
-{
-       engine->reset.finish(engine);
+       mutex_unlock(&i915->drm.struct_mutex);
+       intel_runtime_pm_put(i915, wakeref);
 
-       intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL);
+       kfree(arg);
 }
 
 static void reset_finish(struct drm_i915_private *i915)
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
 
-       lockdep_assert_held(&i915->drm.struct_mutex);
-
-       for_each_engine(engine, i915, id) {
-               engine->hangcheck.active_request = NULL;
+       for_each_engine(engine, i915, id)
                reset_finish_engine(engine);
+}
+
+static void reset_restart(struct drm_i915_private *i915)
+{
+       struct i915_gpu_restart *arg;
+
+       /*
+        * Following the reset, ensure that we always reload context for
+        * powersaving, and to correct engine->last_retired_context. Since
+        * this requires us to submit a request, queue a worker to do that
+        * task for us to evade any locking here.
+        */
+       if (READ_ONCE(i915->gpu_error.restart))
+               return;
+
+       arg = kmalloc(sizeof(*arg), GFP_KERNEL);
+       if (arg) {
+               arg->i915 = i915;
+               INIT_WORK(&arg->work, restart_work);
+
+               WRITE_ONCE(i915->gpu_error.restart, arg);
+               queue_work(i915->wq, &arg->work);
        }
 }
 
        struct i915_timeline *tl;
        bool ret = false;
 
-       lockdep_assert_held(&i915->drm.struct_mutex);
-
        if (!test_bit(I915_WEDGED, &error->flags))
                return true;
 
         */
        list_for_each_entry(tl, &i915->gt.timelines, link) {
                struct i915_request *rq;
+               long timeout;
 
-               rq = i915_gem_active_peek(&tl->last_request,
-                                         &i915->drm.struct_mutex);
+               rq = i915_gem_active_get_unlocked(&tl->last_request);
                if (!rq)
                        continue;
 
                 * and when the seqno passes the fence, the signaler
                 * then signals the fence waking us up).
                 */
-               if (dma_fence_default_wait(&rq->fence, true,
-                                          MAX_SCHEDULE_TIMEOUT) < 0)
+               timeout = dma_fence_default_wait(&rq->fence, true,
+                                                MAX_SCHEDULE_TIMEOUT);
+               i915_request_put(rq);
+               if (timeout < 0)
                        goto unlock;
        }
-       i915_retire_requests(i915);
-       GEM_BUG_ON(i915->gt.active_requests);
 
        intel_engines_sanitize(i915, false);
 
         * context and do not require stop_machine().
         */
        intel_engines_reset_default_submission(i915);
-       i915_gem_contexts_lost(i915);
 
        GEM_TRACE("end\n");
 
        return ret;
 }
 
+struct __i915_reset {
+       struct drm_i915_private *i915;
+       unsigned int stalled_mask;
+};
+
+static int __i915_reset__BKL(void *data)
+{
+       struct __i915_reset *arg = data;
+       int err;
+
+       err = intel_gpu_reset(arg->i915, ALL_ENGINES);
+       if (err)
+               return err;
+
+       return gt_reset(arg->i915, arg->stalled_mask);
+}
+
+#if RESET_UNDER_STOP_MACHINE
+/*
+ * XXX An alternative to using stop_machine would be to park only the
+ * processes that have a GGTT mmap. By remote parking the threads (SIGSTOP)
+ * we should be able to prevent their memmory accesses via the lost fence
+ * registers over the course of the reset without the potential recursive
+ * of mutexes between the pagefault handler and reset.
+ *
+ * See igt/gem_mmap_gtt/hang
+ */
+#define __do_reset(fn, arg) stop_machine(fn, arg, NULL)
+#else
+#define __do_reset(fn, arg) fn(arg)
+#endif
+
+static int do_reset(struct drm_i915_private *i915, unsigned int stalled_mask)
+{
+       struct __i915_reset arg = { i915, stalled_mask };
+       int err, i;
+
+       err = __do_reset(__i915_reset__BKL, &arg);
+       for (i = 0; err && i < RESET_MAX_RETRIES; i++) {
+               msleep(100);
+               err = __do_reset(__i915_reset__BKL, &arg);
+       }
+
+       return err;
+}
+
 /**
  * i915_reset - reset chip after a hang
  * @i915: #drm_i915_private to reset
 {
        struct i915_gpu_error *error = &i915->gpu_error;
        int ret;
-       int i;
 
        GEM_TRACE("flags=%lx\n", error->flags);
 
        might_sleep();
-       lockdep_assert_held(&i915->drm.struct_mutex);
        assert_rpm_wakelock_held(i915);
        GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags));
 
-       if (!test_bit(I915_RESET_HANDOFF, &error->flags))
-               return;
-
        /* Clear any previous failed attempts at recovery. Time to try again. */
        if (!i915_gem_unset_wedged(i915))
-               goto wakeup;
+               return;
 
        if (reason)
                dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason);
        error->reset_count++;
 
-       ret = reset_prepare(i915);
-       if (ret) {
-               dev_err(i915->drm.dev, "GPU recovery failed\n");
-               goto taint;
-       }
+       reset_prepare(i915);
 
        if (!intel_has_gpu_reset(i915)) {
                if (i915_modparams.reset)
                goto error;
        }
 
-       for (i = 0; i < RESET_MAX_RETRIES; i++) {
-               ret = intel_gpu_reset(i915, ALL_ENGINES);
-               if (ret == 0)
-                       break;
-
-               msleep(100);
-       }
-       if (ret) {
+       if (do_reset(i915, stalled_mask)) {
                dev_err(i915->drm.dev, "Failed to reset chip\n");
                goto taint;
        }
 
-       /* Ok, now get things going again... */
-
-       /*
-        * Everything depends on having the GTT running, so we need to start
-        * there.
-        */
-       ret = i915_ggtt_enable_hw(i915);
-       if (ret) {
-               DRM_ERROR("Failed to re-enable GGTT following reset (%d)\n",
-                         ret);
-               goto error;
-       }
-
-       gt_reset(i915, stalled_mask);
        intel_overlay_reset(i915);
 
        /*
 
 finish:
        reset_finish(i915);
-wakeup:
-       clear_bit(I915_RESET_HANDOFF, &error->flags);
-       wake_up_bit(&error->flags, I915_RESET_HANDOFF);
+       if (!i915_terminally_wedged(error))
+               reset_restart(i915);
        return;
 
 taint:
        add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 error:
        i915_gem_set_wedged(i915);
-       i915_retire_requests(i915);
        goto finish;
 }
 
 int i915_reset_engine(struct intel_engine_cs *engine, const char *msg)
 {
        struct i915_gpu_error *error = &engine->i915->gpu_error;
-       struct i915_request *active_request;
        int ret;
 
        GEM_TRACE("%s flags=%lx\n", engine->name, error->flags);
        GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
-       active_request = reset_prepare_engine(engine);
-       if (IS_ERR_OR_NULL(active_request)) {
-               /* Either the previous reset failed, or we pardon the reset. */
-               ret = PTR_ERR(active_request);
-               goto out;
-       }
+       if (i915_seqno_passed(intel_engine_get_seqno(engine),
+                             intel_engine_last_submit(engine)))
+               return 0;
+
+       reset_prepare_engine(engine);
 
        if (msg)
                dev_notice(engine->i915->drm.dev,
         * active request and can drop it, adjust head to skip the offending
         * request to resume executing remaining requests in the queue.
         */
-       reset_engine(engine, active_request, true);
+       intel_engine_reset(engine, true);
 
        /*
         * The engine and its registers (and workarounds in case of render)
        i915_wedge_on_timeout(&w, i915, 5 * HZ) {
                intel_prepare_reset(i915);
 
-               error->reason = reason;
-               error->stalled_mask = engine_mask;
-
-               /* Signal that locked waiters should reset the GPU */
-               smp_mb__before_atomic();
-               set_bit(I915_RESET_HANDOFF, &error->flags);
-               wake_up_all(&error->wait_queue);
-
-               /*
-                * Wait for anyone holding the lock to wakeup, without
-                * blocking indefinitely on struct_mutex.
-                */
-               do {
-                       if (mutex_trylock(&i915->drm.struct_mutex)) {
-                               i915_reset(i915, engine_mask, reason);
-                               mutex_unlock(&i915->drm.struct_mutex);
-                       }
-               } while (wait_on_bit_timeout(&error->flags,
-                                            I915_RESET_HANDOFF,
-                                            TASK_UNINTERRUPTIBLE,
-                                            1));
-
-               error->stalled_mask = 0;
-               error->reason = NULL;
+               i915_reset(i915, engine_mask, reason);
 
                intel_finish_reset(i915);
        }
        intel_runtime_pm_put(i915, wakeref);
 }
 
+bool i915_reset_flush(struct drm_i915_private *i915)
+{
+       int err;
+
+       cancel_delayed_work_sync(&i915->gpu_error.hangcheck_work);
+
+       flush_workqueue(i915->wq);
+       GEM_BUG_ON(READ_ONCE(i915->gpu_error.restart));
+
+       mutex_lock(&i915->drm.struct_mutex);
+       err = i915_gem_wait_for_idle(i915,
+                                    I915_WAIT_LOCKED |
+                                    I915_WAIT_FOR_IDLE_BOOST,
+                                    MAX_SCHEDULE_TIMEOUT);
+       mutex_unlock(&i915->drm.struct_mutex);
+
+       return !err;
+}
+
 static void i915_wedge_me(struct work_struct *work)
 {
        struct i915_wedge_me *w = container_of(work, typeof(*w), work.work);
 
 int i915_reset_engine(struct intel_engine_cs *engine,
                      const char *reason);
 
+void i915_reset_request(struct i915_request *rq, bool guilty);
+bool i915_reset_flush(struct drm_i915_private *i915);
+
 bool intel_has_gpu_reset(struct drm_i915_private *i915);
 bool intel_has_reset_engine(struct drm_i915_private *i915);
 
 
        if (!reset_engines(i915) && !force)
                return;
 
-       for_each_engine(engine, i915, id) {
-               if (engine->reset.reset)
-                       engine->reset.reset(engine, NULL);
-       }
+       for_each_engine(engine, i915, id)
+               intel_engine_reset(engine, false);
 }
 
 /**
 
        spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
-static struct i915_request *
-guc_reset_prepare(struct intel_engine_cs *engine)
+static void guc_reset_prepare(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
 
         */
        if (engine->i915->guc.preempt_wq)
                flush_workqueue(engine->i915->guc.preempt_wq);
-
-       return i915_gem_find_active_request(engine);
 }
 
 /*
 
 #include "i915_drv.h"
 #include "i915_reset.h"
 
+struct hangcheck {
+       u64 acthd;
+       u32 seqno;
+       enum intel_engine_hangcheck_action action;
+       unsigned long action_timestamp;
+       int deadlock;
+       struct intel_instdone instdone;
+       bool wedged:1;
+       bool stalled:1;
+};
+
 static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
 {
        u32 tmp = current_instdone | *old_instdone;
 }
 
 static void hangcheck_load_sample(struct intel_engine_cs *engine,
-                                 struct intel_engine_hangcheck *hc)
+                                 struct hangcheck *hc)
 {
        hc->acthd = intel_engine_get_active_head(engine);
        hc->seqno = intel_engine_get_seqno(engine);
 }
 
 static void hangcheck_store_sample(struct intel_engine_cs *engine,
-                                  const struct intel_engine_hangcheck *hc)
+                                  const struct hangcheck *hc)
 {
        engine->hangcheck.acthd = hc->acthd;
        engine->hangcheck.seqno = hc->seqno;
-       engine->hangcheck.action = hc->action;
-       engine->hangcheck.stalled = hc->stalled;
-       engine->hangcheck.wedged = hc->wedged;
 }
 
 static enum intel_engine_hangcheck_action
 hangcheck_get_action(struct intel_engine_cs *engine,
-                    const struct intel_engine_hangcheck *hc)
+                    const struct hangcheck *hc)
 {
        if (engine->hangcheck.seqno != hc->seqno)
                return ENGINE_ACTIVE_SEQNO;
 }
 
 static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
-                                       struct intel_engine_hangcheck *hc)
+                                       struct hangcheck *hc)
 {
        unsigned long timeout = I915_ENGINE_DEAD_TIMEOUT;
 
        intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
 
        for_each_engine(engine, dev_priv, id) {
-               struct intel_engine_hangcheck hc;
+               struct hangcheck hc;
 
                hangcheck_load_sample(engine, &hc);
                hangcheck_accumulate_sample(engine, &hc);
                hangcheck_store_sample(engine, &hc);
 
-               if (engine->hangcheck.stalled) {
+               if (hc.stalled) {
                        hung |= intel_engine_flag(engine);
                        if (hc.action != ENGINE_DEAD)
                                stuck |= intel_engine_flag(engine);
                }
 
-               if (engine->hangcheck.wedged)
+               if (hc.wedged)
                        wedged |= intel_engine_flag(engine);
        }
 
 
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_reset.h"
 #include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
 #include "intel_mocs.h"
        assert_ring_tail_valid(rq->ring, rq->tail);
 }
 
-static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
+static struct i915_request *
+__unwind_incomplete_requests(struct intel_engine_cs *engine)
 {
        struct i915_request *rq, *rn, *active = NULL;
        struct list_head *uninitialized_var(pl);
                list_move_tail(&active->sched.link,
                               i915_sched_lookup_priolist(engine, prio));
        }
+
+       return active;
 }
 
 void
        return 0;
 }
 
-static struct i915_request *
-execlists_reset_prepare(struct intel_engine_cs *engine)
+static void execlists_reset_prepare(struct intel_engine_cs *engine)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct i915_request *request, *active;
        unsigned long flags;
 
        GEM_TRACE("%s: depth<-%d\n", engine->name,
         * prevents the race.
         */
        __tasklet_disable_sync_once(&execlists->tasklet);
+       GEM_BUG_ON(!reset_in_progress(execlists));
 
+       /* And flush any current direct submission. */
        spin_lock_irqsave(&engine->timeline.lock, flags);
-
-       /*
-        * We want to flush the pending context switches, having disabled
-        * the tasklet above, we can assume exclusive access to the execlists.
-        * For this allows us to catch up with an inflight preemption event,
-        * and avoid blaming an innocent request if the stall was due to the
-        * preemption itself.
-        */
-       process_csb(engine);
-
-       /*
-        * The last active request can then be no later than the last request
-        * now in ELSP[0]. So search backwards from there, so that if the GPU
-        * has advanced beyond the last CSB update, it will be pardoned.
-        */
-       active = NULL;
-       request = port_request(execlists->port);
-       if (request) {
-               /*
-                * Prevent the breadcrumb from advancing before we decide
-                * which request is currently active.
-                */
-               intel_engine_stop_cs(engine);
-
-               list_for_each_entry_from_reverse(request,
-                                                &engine->timeline.requests,
-                                                link) {
-                       if (__i915_request_completed(request,
-                                                    request->global_seqno))
-                               break;
-
-                       active = request;
-               }
-       }
-
+       process_csb(engine); /* drain preemption events */
        spin_unlock_irqrestore(&engine->timeline.lock, flags);
-
-       return active;
 }
 
-static void execlists_reset(struct intel_engine_cs *engine,
-                           struct i915_request *request)
+static void execlists_reset(struct intel_engine_cs *engine, bool stalled)
 {
        struct intel_engine_execlists * const execlists = &engine->execlists;
+       struct i915_request *rq;
        unsigned long flags;
        u32 *regs;
 
-       GEM_TRACE("%s request global=%d, current=%d\n",
-                 engine->name, request ? request->global_seqno : 0,
-                 intel_engine_get_seqno(engine));
-
        spin_lock_irqsave(&engine->timeline.lock, flags);
 
        /*
        execlists_cancel_port_requests(execlists);
 
        /* Push back any incomplete requests for replay after the reset. */
-       __unwind_incomplete_requests(engine);
+       rq = __unwind_incomplete_requests(engine);
 
        /* Following the reset, we need to reload the CSB read/write pointers */
        reset_csb_pointers(&engine->execlists);
 
-       spin_unlock_irqrestore(&engine->timeline.lock, flags);
+       GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+                 engine->name,
+                 rq ? rq->global_seqno : 0,
+                 intel_engine_get_seqno(engine),
+                 yesno(stalled));
+       if (!rq)
+               goto out_unlock;
 
        /*
         * If the request was innocent, we leave the request in the ELSP
         * and have to at least restore the RING register in the context
         * image back to the expected values to skip over the guilty request.
         */
-       if (!request || request->fence.error != -EIO)
-               return;
+       i915_reset_request(rq, stalled);
+       if (!stalled)
+               goto out_unlock;
 
        /*
         * We want a simple context + ring to execute the breadcrumb update.
         * future request will be after userspace has had the opportunity
         * to recreate its own state.
         */
-       regs = request->hw_context->lrc_reg_state;
+       regs = rq->hw_context->lrc_reg_state;
        if (engine->pinned_default_state) {
                memcpy(regs, /* skip restoring the vanilla PPHWSP */
                       engine->pinned_default_state + LRC_STATE_PN * PAGE_SIZE,
        }
 
        /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */
-       request->ring->head = intel_ring_wrap(request->ring, request->postfix);
+       rq->ring->head = intel_ring_wrap(rq->ring, rq->postfix);
+       intel_ring_update_space(rq->ring);
 
-       execlists_init_reg_state(regs, request->gem_context, engine,
-                                request->ring);
+       execlists_init_reg_state(regs, rq->gem_context, engine, rq->ring);
+       __execlists_update_reg_state(engine, rq->hw_context);
 
-       __execlists_update_reg_state(engine, request->hw_context);
-
-       intel_ring_update_space(request->ring);
-
-       /* Reset WaIdleLiteRestore:bdw,skl as well */
-       unwind_wa_tail(request);
+out_unlock:
+       spin_unlock_irqrestore(&engine->timeline.lock, flags);
 }
 
 static void execlists_reset_finish(struct intel_engine_cs *engine)
         * to sleep before we restart and reload a context.
         *
         */
+       GEM_BUG_ON(!reset_in_progress(execlists));
        if (!RB_EMPTY_ROOT(&execlists->queue.rb_root))
                execlists->tasklet.func(execlists->tasklet.data);
 
 
        if (!overlay)
                return;
 
-       intel_overlay_release_old_vid(overlay);
-
        overlay->old_xscale = 0;
        overlay->old_yscale = 0;
        overlay->crtc = NULL;
 
 
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
+#include "i915_reset.h"
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include "intel_workarounds.h"
        return ret;
 }
 
-static struct i915_request *reset_prepare(struct intel_engine_cs *engine)
+static void reset_prepare(struct intel_engine_cs *engine)
 {
        intel_engine_stop_cs(engine);
-       return i915_gem_find_active_request(engine);
 }
 
-static void skip_request(struct i915_request *rq)
+static void reset_ring(struct intel_engine_cs *engine, bool stalled)
 {
-       void *vaddr = rq->ring->vaddr;
+       struct i915_timeline *tl = &engine->timeline;
+       struct i915_request *pos, *rq;
+       unsigned long flags;
        u32 head;
 
-       head = rq->infix;
-       if (rq->postfix < head) {
-               memset32(vaddr + head, MI_NOOP,
-                        (rq->ring->size - head) / sizeof(u32));
-               head = 0;
+       rq = NULL;
+       spin_lock_irqsave(&tl->lock, flags);
+       list_for_each_entry(pos, &tl->requests, link) {
+               if (!__i915_request_completed(pos, pos->global_seqno)) {
+                       rq = pos;
+                       break;
+               }
        }
-       memset32(vaddr + head, MI_NOOP, (rq->postfix - head) / sizeof(u32));
-}
-
-static void reset_ring(struct intel_engine_cs *engine, struct i915_request *rq)
-{
-       GEM_TRACE("%s request global=%d, current=%d\n",
-                 engine->name, rq ? rq->global_seqno : 0,
-                 intel_engine_get_seqno(engine));
 
+       GEM_TRACE("%s seqno=%d, current=%d, stalled? %s\n",
+                 engine->name,
+                 rq ? rq->global_seqno : 0,
+                 intel_engine_get_seqno(engine),
+                 yesno(stalled));
        /*
-        * Try to restore the logical GPU state to match the continuation
-        * of the request queue. If we skip the context/PD restore, then
-        * the next request may try to execute assuming that its context
-        * is valid and loaded on the GPU and so may try to access invalid
-        * memory, prompting repeated GPU hangs.
+        * The guilty request will get skipped on a hung engine.
         *
-        * If the request was guilty, we still restore the logical state
-        * in case the next request requires it (e.g. the aliasing ppgtt),
-        * but skip over the hung batch.
+        * Users of client default contexts do not rely on logical
+        * state preserved between batches so it is safe to execute
+        * queued requests following the hang. Non default contexts
+        * rely on preserved state, so skipping a batch loses the
+        * evolution of the state and it needs to be considered corrupted.
+        * Executing more queued batches on top of corrupted state is
+        * risky. But we take the risk by trying to advance through
+        * the queued requests in order to make the client behaviour
+        * more predictable around resets, by not throwing away random
+        * amount of batches it has prepared for execution. Sophisticated
+        * clients can use gem_reset_stats_ioctl and dma fence status
+        * (exported via sync_file info ioctl on explicit fences) to observe
+        * when it loses the context state and should rebuild accordingly.
         *
-        * If the request was innocent, we try to replay the request with
-        * the restored context.
+        * The context ban, and ultimately the client ban, mechanism are safety
+        * valves if client submission ends up resulting in nothing more than
+        * subsequent hangs.
         */
+
        if (rq) {
-               /* If the rq hung, jump to its breadcrumb and skip the batch */
-               rq->ring->head = intel_ring_wrap(rq->ring, rq->head);
-               if (rq->fence.error == -EIO)
-                       skip_request(rq);
+               /*
+                * Try to restore the logical GPU state to match the
+                * continuation of the request queue. If we skip the
+                * context/PD restore, then the next request may try to execute
+                * assuming that its context is valid and loaded on the GPU and
+                * so may try to access invalid memory, prompting repeated GPU
+                * hangs.
+                *
+                * If the request was guilty, we still restore the logical
+                * state in case the next request requires it (e.g. the
+                * aliasing ppgtt), but skip over the hung batch.
+                *
+                * If the request was innocent, we try to replay the request
+                * with the restored context.
+                */
+               i915_reset_request(rq, stalled);
+
+               GEM_BUG_ON(rq->ring != engine->buffer);
+               head = rq->head;
+       } else {
+               head = engine->buffer->tail;
        }
+       engine->buffer->head = intel_ring_wrap(engine->buffer, head);
+
+       spin_unlock_irqrestore(&tl->lock, flags);
 }
 
 static void reset_finish(struct intel_engine_cs *engine)
 
 struct intel_engine_hangcheck {
        u64 acthd;
        u32 seqno;
-       enum intel_engine_hangcheck_action action;
        unsigned long action_timestamp;
-       int deadlock;
        struct intel_instdone instdone;
-       struct i915_request *active_request;
-       bool stalled:1;
-       bool wedged:1;
 };
 
 struct intel_ring {
        int             (*init_hw)(struct intel_engine_cs *engine);
 
        struct {
-               struct i915_request *(*prepare)(struct intel_engine_cs *engine);
-               void (*reset)(struct intel_engine_cs *engine,
-                             struct i915_request *rq);
+               void (*prepare)(struct intel_engine_cs *engine);
+               void (*reset)(struct intel_engine_cs *engine, bool stalled);
                void (*finish)(struct intel_engine_cs *engine);
        } reset;
 
        return cs;
 }
 
+static inline void intel_engine_reset(struct intel_engine_cs *engine,
+                                     bool stalled)
+{
+       if (engine->reset.reset)
+               engine->reset.reset(engine, stalled);
+}
+
 void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
 
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 
        /* Check that we can issue a global GPU reset */
 
        igt_global_reset_lock(i915);
-       set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
 
-       mutex_lock(&i915->drm.struct_mutex);
        reset_count = i915_reset_count(&i915->gpu_error);
 
        i915_reset(i915, ALL_ENGINES, NULL);
                pr_err("No GPU reset recorded!\n");
                err = -EINVAL;
        }
-       mutex_unlock(&i915->drm.struct_mutex);
 
-       GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
        igt_global_reset_unlock(i915);
 
        if (i915_terminally_wedged(&i915->gpu_error))
        i915_gem_set_wedged(i915);
        GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
 
-       set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
        i915_reset(i915, ALL_ENGINES, NULL);
-       GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
 
        intel_runtime_pm_put(i915, wakeref);
        mutex_unlock(&i915->drm.struct_mutex);
                                break;
                        }
 
-                       if (!wait_for_idle(engine)) {
+                       if (!i915_reset_flush(i915)) {
                                struct drm_printer p =
                                        drm_info_printer(i915->drm.dev);
 
        return 0;
 }
 
-static u32 fake_hangcheck(struct i915_request *rq, u32 mask)
+static u32 fake_hangcheck(struct drm_i915_private *i915, u32 mask)
 {
-       struct i915_gpu_error *error = &rq->i915->gpu_error;
-       u32 reset_count = i915_reset_count(error);
-
-       error->stalled_mask = mask;
-
-       /* set_bit() must be after we have setup the backchannel (mask) */
-       smp_mb__before_atomic();
-       set_bit(I915_RESET_HANDOFF, &error->flags);
+       u32 count = i915_reset_count(&i915->gpu_error);
 
-       wake_up_all(&error->wait_queue);
+       i915_reset(i915, mask, NULL);
 
-       return reset_count;
+       return count;
 }
 
 static int igt_reset_wait(void *arg)
                goto out_rq;
        }
 
-       reset_count = fake_hangcheck(rq, ALL_ENGINES);
+       reset_count = fake_hangcheck(i915, ALL_ENGINES);
 
        timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10);
        if (timeout < 0) {
                goto out_rq;
        }
 
-       GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags));
        if (i915_reset_count(&i915->gpu_error) == reset_count) {
                pr_err("No GPU reset recorded!\n");
                err = -EINVAL;
        }
 
 out_reset:
-       fake_hangcheck(rq, intel_engine_flag(rq->engine));
+       fake_hangcheck(rq->i915, intel_engine_flag(rq->engine));
 
        if (tsk) {
                struct igt_wedge_me w;
                                goto fini;
                        }
 
-                       reset_count = fake_hangcheck(prev, ENGINE_MASK(id));
-
-                       i915_reset(i915, ENGINE_MASK(id), NULL);
-
-                       GEM_BUG_ON(test_bit(I915_RESET_HANDOFF,
-                                           &i915->gpu_error.flags));
+                       reset_count = fake_hangcheck(i915, ENGINE_MASK(id));
 
                        if (prev->fence.error != -EIO) {
                                pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n",
                pr_err("%s(%s): Failed to start request %llx, at %x\n",
                       __func__, engine->name,
                       rq->fence.seqno, hws_seqno(&h, rq));
+               i915_gem_set_wedged(i915);
                err = -EIO;
        }
 
 static void force_reset(struct drm_i915_private *i915)
 {
        i915_gem_set_wedged(i915);
-       set_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags);
        i915_reset(i915, 0, NULL);
 }
 
        if (i915_terminally_wedged(&i915->gpu_error))
                goto unlock;
 
+       if (intel_has_gpu_reset(i915)) {
+               const typeof(*phases) *p;
+
+               for (p = phases; p->name; p++) {
+                       GEM_TRACE("intel_gpu_reset under %s\n", p->name);
+
+                       p->critical_section_begin();
+                       err = intel_gpu_reset(i915, ALL_ENGINES);
+                       p->critical_section_end();
+
+                       if (err) {
+                               pr_err("intel_gpu_reset failed under %s\n",
+                                      p->name);
+                               goto out;
+                       }
+               }
+
+               force_reset(i915);
+       }
+
        if (intel_has_reset_engine(i915)) {
                struct intel_engine_cs *engine;
                enum intel_engine_id id;
 
 
 static int do_device_reset(struct intel_engine_cs *engine)
 {
-       set_bit(I915_RESET_HANDOFF, &engine->i915->gpu_error.flags);
        i915_reset(engine->i915, ENGINE_MASK(engine->id), "live_workarounds");
        return 0;
 }
 live_gpu_reset_gt_engine_workarounds(void *arg)
 {
        struct drm_i915_private *i915 = arg;
-       struct i915_gpu_error *error = &i915->gpu_error;
        intel_wakeref_t wakeref;
        struct wa_lists lists;
        bool ok;
        if (!ok)
                goto out;
 
-       set_bit(I915_RESET_HANDOFF, &error->flags);
        i915_reset(i915, ALL_ENGINES, "live_workarounds");
 
        ok = verify_gt_engine_wa(i915, &lists, "after reset");
 
        i915_gem_contexts_lost(i915);
        mutex_unlock(&i915->drm.struct_mutex);
 
-       cancel_delayed_work_sync(&i915->gt.retire_work);
-       cancel_delayed_work_sync(&i915->gt.idle_work);
+       drain_delayed_work(&i915->gt.retire_work);
+       drain_delayed_work(&i915->gt.idle_work);
        i915_gem_drain_workqueue(i915);
 
        mutex_lock(&i915->drm.struct_mutex);