drm/i915/gt: Allow temporary suspension of inflight requests

author Chris Wilson <chris@chris-wilson.co.uk>

Thu, 16 Jan 2020 18:47:53 +0000 (18:47 +0000)

committer Jani Nikula <jani.nikula@intel.com>

Wed, 12 Feb 2020 14:55:58 +0000 (16:55 +0200)
author Chris Wilson <chris@chris-wilson.co.uk>
Thu, 16 Jan 2020 18:47:53 +0000 (18:47 +0000)
committer Jani Nikula <jani.nikula@intel.com>
Wed, 12 Feb 2020 14:55:58 +0000 (16:55 +0200)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c

index f451ef376548e19207233333d4231fcab2364624..06ff7695fa290b8ad68c4a9a7f2667eb41aed8a5 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -671,6 +671,7 @@ void
  intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass)
  {
         INIT_LIST_HEAD(&engine->active.requests);
+       INIT_LIST_HEAD(&engine->active.hold);
  
         spin_lock_init(&engine->active.lock);
         lockdep_set_subclass(&engine->active.lock, subclass);
@@ -1422,6 +1423,17 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
         }
  }
  
+static unsigned long list_count(struct list_head *list)
+{
+       struct list_head *pos;
+       unsigned long count = 0;
+
+       list_for_each(pos, list)
+               count++;
+
+       return count;
+}
+
  void intel_engine_dump(struct intel_engine_cs *engine,
                        struct drm_printer *m,
                        const char *header, ...)
@@ -1491,6 +1503,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
                         hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE);
                 }
         }
+       drm_printf(m, "\tOn hold?: %lu\n", list_count(&engine->active.hold));
         spin_unlock_irqrestore(&engine->active.lock, flags);
  
         drm_printf(m, "\tMMIO base:  0x%08x\n", engine->mmio_base);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h

index 00287515e7af17608875f965160adf5b6eaeb846..77e68c7643de88e4039e5e3e19856518a90321b0 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -295,6 +295,7 @@ struct intel_engine_cs {
         struct {
                 spinlock_t lock;
                 struct list_head requests;
+               struct list_head hold; /* ready requests, but on hold */
         } active;
  
         struct llist_head barrier_tasks;
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index f1f49c4aa7af4773f114fb454063b628b57ddd25..93b35cb72aa649a622d3f54321fc52421a79bd1e 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1635,8 +1635,8 @@ static void defer_request(struct i915_request *rq, struct list_head * const pl)
                                    !i915_request_completed(rq));
  
                         GEM_BUG_ON(i915_request_is_active(w));
-                       if (list_empty(&w->sched.link))
-                               continue; /* Not yet submitted; unready */
+                       if (!i915_request_is_ready(w))
+                               continue;
  
                         if (rq_prio(w) < rq_prio(rq))
                                 continue;
@@ -2354,6 +2354,145 @@ static void __execlists_submission_tasklet(struct intel_engine_cs *const engine)
         }
  }
  
+static void __execlists_hold(struct i915_request *rq)
+{
+       LIST_HEAD(list);
+
+       do {
+               struct i915_dependency *p;
+
+               if (i915_request_is_active(rq))
+                       __i915_request_unsubmit(rq);
+
+               RQ_TRACE(rq, "on hold\n");
+               clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+               list_move_tail(&rq->sched.link, &rq->engine->active.hold);
+               i915_request_set_hold(rq);
+
+               list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+                       struct i915_request *w =
+                               container_of(p->waiter, typeof(*w), sched);
+
+                       /* Leave semaphores spinning on the other engines */
+                       if (w->engine != rq->engine)
+                               continue;
+
+                       if (!i915_request_is_ready(w))
+                               continue;
+
+                       if (i915_request_completed(w))
+                               continue;
+
+                       if (i915_request_on_hold(rq))
+                               continue;
+
+                       list_move_tail(&w->sched.link, &list);
+               }
+
+               rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+       } while (rq);
+}
+
+__maybe_unused
+static void execlists_hold(struct intel_engine_cs *engine,
+                          struct i915_request *rq)
+{
+       spin_lock_irq(&engine->active.lock);
+
+       /*
+        * Transfer this request onto the hold queue to prevent it
+        * being resumbitted to HW (and potentially completed) before we have
+        * released it. Since we may have already submitted following
+        * requests, we need to remove those as well.
+        */
+       GEM_BUG_ON(i915_request_on_hold(rq));
+       GEM_BUG_ON(rq->engine != engine);
+       __execlists_hold(rq);
+
+       spin_unlock_irq(&engine->active.lock);
+}
+
+static bool hold_request(const struct i915_request *rq)
+{
+       struct i915_dependency *p;
+
+       /*
+        * If one of our ancestors is on hold, we must also be on hold,
+        * otherwise we will bypass it and execute before it.
+        */
+       list_for_each_entry(p, &rq->sched.signalers_list, signal_link) {
+               const struct i915_request *s =
+                       container_of(p->signaler, typeof(*s), sched);
+
+               if (s->engine != rq->engine)
+                       continue;
+
+               if (i915_request_on_hold(s))
+                       return true;
+       }
+
+       return false;
+}
+
+static void __execlists_unhold(struct i915_request *rq)
+{
+       LIST_HEAD(list);
+
+       do {
+               struct i915_dependency *p;
+
+               GEM_BUG_ON(!i915_request_on_hold(rq));
+               GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
+
+               i915_request_clear_hold(rq);
+               list_move_tail(&rq->sched.link,
+                              i915_sched_lookup_priolist(rq->engine,
+                                                         rq_prio(rq)));
+               set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
+               RQ_TRACE(rq, "hold release\n");
+
+               /* Also release any children on this engine that are ready */
+               list_for_each_entry(p, &rq->sched.waiters_list, wait_link) {
+                       struct i915_request *w =
+                               container_of(p->waiter, typeof(*w), sched);
+
+                       if (w->engine != rq->engine)
+                               continue;
+
+                       if (!i915_request_on_hold(rq))
+                               continue;
+
+                       /* Check that no other parents are also on hold */
+                       if (hold_request(rq))
+                               continue;
+
+                       list_move_tail(&w->sched.link, &list);
+               }
+
+               rq = list_first_entry_or_null(&list, typeof(*rq), sched.link);
+       } while (rq);
+}
+
+__maybe_unused
+static void execlists_unhold(struct intel_engine_cs *engine,
+                            struct i915_request *rq)
+{
+       spin_lock_irq(&engine->active.lock);
+
+       /*
+        * Move this request back to the priority queue, and all of its
+        * children and grandchildren that were suspended along with it.
+        */
+       __execlists_unhold(rq);
+
+       if (rq_prio(rq) > engine->execlists.queue_priority_hint) {
+               engine->execlists.queue_priority_hint = rq_prio(rq);
+               tasklet_hi_schedule(&engine->execlists.tasklet);
+       }
+
+       spin_unlock_irq(&engine->active.lock);
+}
+
  static noinline void preempt_reset(struct intel_engine_cs *engine)
  {
         const unsigned int bit = I915_RESET_ENGINE + engine->id;
@@ -2466,6 +2605,13 @@ static void submit_queue(struct intel_engine_cs *engine,
         __submit_queue_imm(engine);
  }
  
+static bool ancestor_on_hold(const struct intel_engine_cs *engine,
+                            const struct i915_request *rq)
+{
+       GEM_BUG_ON(i915_request_on_hold(rq));
+       return !list_empty(&engine->active.hold) && hold_request(rq);
+}
+
  static void execlists_submit_request(struct i915_request *request)
  {
         struct intel_engine_cs *engine = request->engine;
@@ -2474,12 +2620,17 @@ static void execlists_submit_request(struct i915_request *request)
         /* Will be called from irq-context when using foreign fences. */
         spin_lock_irqsave(&engine->active.lock, flags);
  
-       queue_request(engine, request);
+       if (unlikely(ancestor_on_hold(engine, request))) {
+               list_add_tail(&request->sched.link, &engine->active.hold);
+               i915_request_set_hold(request);
+       } else {
+               queue_request(engine, request);
  
-       GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
-       GEM_BUG_ON(list_empty(&request->sched.link));
+               GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root));
+               GEM_BUG_ON(list_empty(&request->sched.link));
  
-       submit_queue(engine, request);
+               submit_queue(engine, request);
+       }
  
         spin_unlock_irqrestore(&engine->active.lock, flags);
  }
@@ -3328,6 +3479,10 @@ static void execlists_reset_cancel(struct intel_engine_cs *engine)
                 i915_priolist_free(p);
         }
  
+       /* On-hold requests will be flushed to timeline upon their release */
+       list_for_each_entry(rq, &engine->active.hold, sched.link)
+               mark_eio(rq);
+
         /* Cancel all attached virtual engines */
         while ((rb = rb_first_cached(&execlists->virtual))) {
                 struct virtual_engine *ve =
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c

index 15cda024e3e4577424a4c8580b03bfee2f3027d3..b208c2176bbd63826c81da833f56d51e9ebe1019 100644 (file)
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -285,6 +285,108 @@ static int live_unlite_preempt(void *arg)
         return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX));
  }
  
+static int live_hold_reset(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+       struct igt_spinner spin;
+       int err = 0;
+
+       /*
+        * In order to support offline error capture for fast preempt reset,
+        * we need to decouple the guilty request and ensure that it and its
+        * descendents are not executed while the capture is in progress.
+        */
+
+       if (!intel_has_reset_engine(gt))
+               return 0;
+
+       if (igt_spinner_init(&spin, gt))
+               return -ENOMEM;
+
+       for_each_engine(engine, gt, id) {
+               struct intel_context *ce;
+               unsigned long heartbeat;
+               struct i915_request *rq;
+
+               ce = intel_context_create(engine);
+               if (IS_ERR(ce)) {
+                       err = PTR_ERR(ce);
+                       break;
+               }
+
+               engine_heartbeat_disable(engine, &heartbeat);
+
+               rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
+               if (IS_ERR(rq)) {
+                       err = PTR_ERR(rq);
+                       goto out;
+               }
+               i915_request_add(rq);
+
+               if (!igt_wait_for_spinner(&spin, rq)) {
+                       intel_gt_set_wedged(gt);
+                       err = -ETIME;
+                       goto out;
+               }
+
+               /* We have our request executing, now remove it and reset */
+
+               if (test_and_set_bit(I915_RESET_ENGINE + id,
+                                    &gt->reset.flags)) {
+                       spin_unlock_irq(&engine->active.lock);
+                       intel_gt_set_wedged(gt);
+                       err = -EBUSY;
+                       goto out;
+               }
+               tasklet_disable(&engine->execlists.tasklet);
+
+               engine->execlists.tasklet.func(engine->execlists.tasklet.data);
+               GEM_BUG_ON(execlists_active(&engine->execlists) != rq);
+
+               execlists_hold(engine, rq);
+               GEM_BUG_ON(!i915_request_on_hold(rq));
+
+               intel_engine_reset(engine, NULL);
+               GEM_BUG_ON(rq->fence.error != -EIO);
+
+               tasklet_enable(&engine->execlists.tasklet);
+               clear_and_wake_up_bit(I915_RESET_ENGINE + id,
+                                     &gt->reset.flags);
+
+               /* Check that we do not resubmit the held request */
+               i915_request_get(rq);
+               if (!i915_request_wait(rq, 0, HZ / 5)) {
+                       pr_err("%s: on hold request completed!\n",
+                              engine->name);
+                       i915_request_put(rq);
+                       err = -EIO;
+                       goto out;
+               }
+               GEM_BUG_ON(!i915_request_on_hold(rq));
+
+               /* But is resubmitted on release */
+               execlists_unhold(engine, rq);
+               if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+                       pr_err("%s: held request did not complete!\n",
+                              engine->name);
+                       intel_gt_set_wedged(gt);
+                       err = -ETIME;
+               }
+               i915_request_put(rq);
+
+out:
+               engine_heartbeat_enable(engine, heartbeat);
+               intel_context_put(ce);
+               if (err)
+                       break;
+       }
+
+       igt_spinner_fini(&spin);
+       return err;
+}
+
  static int
  emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx)
  {
@@ -3315,6 +3417,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
                 SUBTEST(live_sanitycheck),
                 SUBTEST(live_unlite_switch),
                 SUBTEST(live_unlite_preempt),
+               SUBTEST(live_hold_reset),
                 SUBTEST(live_timeslice_preempt),
                 SUBTEST(live_timeslice_queue),
                 SUBTEST(live_busywait_preempt),
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h

index 6f5bbfa95513cff3c98d9e300bdc72586d15340c..f57eadcf3583a4888c61bbcc0b419442658af392 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -90,6 +90,13 @@ enum {
          */
         I915_FENCE_FLAG_SIGNAL,
  
+       /*
+        * I915_FENCE_FLAG_HOLD - this request is currently on hold
+        *
+        * This request has been suspended, pending an ongoing investigation.
+        */
+       I915_FENCE_FLAG_HOLD,
+
         /*
          * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted
          *
@@ -471,6 +478,27 @@ static inline bool i915_request_is_running(const struct i915_request *rq)
         return __i915_request_has_started(rq);
  }
  
+/**
+ * i915_request_is_running - check if the request is ready for execution
+ * @rq: the request
+ *
+ * Upon construction, the request is instructed to wait upon various
+ * signals before it is ready to be executed by the HW. That is, we do
+ * not want to start execution and read data before it is written. In practice,
+ * this is controlled with a mixture of interrupts and semaphores. Once
+ * the submit fence is completed, the backend scheduler will place the
+ * request into its queue and from there submit it for execution. So we
+ * can detect when a request is eligible for execution (and is under control
+ * of the scheduler) by querying where it is in any of the scheduler's lists.
+ *
+ * Returns true if the request is ready for execution (it may be inflight),
+ * false otherwise.
+ */
+static inline bool i915_request_is_ready(const struct i915_request *rq)
+{
+       return !list_empty(&rq->sched.link);
+}
+
  static inline bool i915_request_completed(const struct i915_request *rq)
  {
         if (i915_request_signaled(rq))
@@ -500,6 +528,21 @@ static inline bool i915_request_has_sentinel(const struct i915_request *rq)
         return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags));
  }
  
+static inline bool i915_request_on_hold(const struct i915_request *rq)
+{
+       return unlikely(test_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags));
+}
+
+static inline void i915_request_set_hold(struct i915_request *rq)
+{
+       set_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+}
+
+static inline void i915_request_clear_hold(struct i915_request *rq)
+{
+       clear_bit(I915_FENCE_FLAG_HOLD, &rq->fence.flags);
+}
+
  static inline struct intel_timeline *
  i915_request_timeline(struct i915_request *rq)
  {
author	Chris Wilson <chris@chris-wilson.co.uk>
	Thu, 16 Jan 2020 18:47:53 +0000 (18:47 +0000)
committer	Jani Nikula <jani.nikula@intel.com>
	Wed, 12 Feb 2020 14:55:58 +0000 (16:55 +0200)
drivers/gpu/drm/i915/gt/intel_engine_cs.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_engine_types.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/selftest_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.h		patch \| blob \| history