drm/i915: Apply an execution_mask to the virtual_engine

author Chris Wilson <chris@chris-wilson.co.uk>

Tue, 21 May 2019 21:11:31 +0000 (22:11 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Wed, 22 May 2019 07:40:43 +0000 (08:40 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Tue, 21 May 2019 21:11:31 +0000 (22:11 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Wed, 22 May 2019 07:40:43 +0000 (08:40 +0100)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index affa5e2dfce1d8597c33462d703134b627f0bd82..0b4d29d4816bfcb4b66604e9635602aa6397b03e 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -509,6 +509,15 @@ execlists_context_schedule_in(struct i915_request *rq)
         rq->hw_context->active = rq->engine;
  }
  
+static void kick_siblings(struct i915_request *rq)
+{
+       struct virtual_engine *ve = to_virtual_engine(rq->hw_context->engine);
+       struct i915_request *next = READ_ONCE(ve->request);
+
+       if (next && next->execution_mask & ~rq->execution_mask)
+               tasklet_schedule(&ve->base.execlists.tasklet);
+}
+
  static inline void
  execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
  {
@@ -516,6 +525,18 @@ execlists_context_schedule_out(struct i915_request *rq, unsigned long status)
         intel_engine_context_out(rq->engine);
         execlists_context_status_change(rq, status);
         trace_i915_request_out(rq);
+
+       /*
+        * If this is part of a virtual engine, its next request may have
+        * been blocked waiting for access to the active context. We have
+        * to kick all the siblings again in case we need to switch (e.g.
+        * the next request is not runnable on this engine). Hopefully,
+        * we will already have submitted the next request before the
+        * tasklet runs and do not need to rebuild each virtual tree
+        * and kick everyone again.
+        */
+       if (rq->engine != rq->hw_context->engine)
+               kick_siblings(rq);
  }
  
  static u64 execlists_update_context(struct i915_request *rq)
@@ -745,6 +766,9 @@ static bool virtual_matches(const struct virtual_engine *ve,
  {
         const struct intel_engine_cs *active;
  
+       if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */
+               return false;
+
         /*
          * We track when the HW has completed saving the context image
          * (i.e. when we have seen the final CS event switching out of
@@ -3118,12 +3142,44 @@ static const struct intel_context_ops virtual_context_ops = {
         .destroy = virtual_context_destroy,
  };
  
+static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
+{
+       struct i915_request *rq;
+       intel_engine_mask_t mask;
+
+       rq = READ_ONCE(ve->request);
+       if (!rq)
+               return 0;
+
+       /* The rq is ready for submission; rq->execution_mask is now stable. */
+       mask = rq->execution_mask;
+       if (unlikely(!mask)) {
+               /* Invalid selection, submit to a random engine in error */
+               i915_request_skip(rq, -ENODEV);
+               mask = ve->siblings[0]->mask;
+       }
+
+       GEM_TRACE("%s: rq=%llx:%lld, mask=%x, prio=%d\n",
+                 ve->base.name,
+                 rq->fence.context, rq->fence.seqno,
+                 mask, ve->base.execlists.queue_priority_hint);
+
+       return mask;
+}
+
  static void virtual_submission_tasklet(unsigned long data)
  {
         struct virtual_engine * const ve = (struct virtual_engine *)data;
         const int prio = ve->base.execlists.queue_priority_hint;
+       intel_engine_mask_t mask;
         unsigned int n;
  
+       rcu_read_lock();
+       mask = virtual_submission_mask(ve);
+       rcu_read_unlock();
+       if (unlikely(!mask))
+               return;
+
         local_irq_disable();
         for (n = 0; READ_ONCE(ve->request) && n < ve->num_siblings; n++) {
                 struct intel_engine_cs *sibling = ve->siblings[n];
@@ -3131,6 +3187,17 @@ static void virtual_submission_tasklet(unsigned long data)
                 struct rb_node **parent, *rb;
                 bool first;
  
+               if (unlikely(!(mask & sibling->mask))) {
+                       if (!RB_EMPTY_NODE(&node->rb)) {
+                               spin_lock(&sibling->timeline.lock);
+                               rb_erase_cached(&node->rb,
+                                               &sibling->execlists.virtual);
+                               RB_CLEAR_NODE(&node->rb);
+                               spin_unlock(&sibling->timeline.lock);
+                       }
+                       continue;
+               }
+
                 spin_lock(&sibling->timeline.lock);
  
                 if (!RB_EMPTY_NODE(&node->rb)) {
diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c

index f880271fb9bad47f1f9ef2bea5d3eea0fcae2184..61637f525690abf5077fdecc82b6d955c6968c30 100644 (file)
--- a/drivers/gpu/drm/i915/gt/selftest_lrc.c
+++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c
@@ -1489,6 +1489,136 @@ out_unlock:
         return err;
  }
  
+static int mask_virtual_engine(struct drm_i915_private *i915,
+                              struct intel_engine_cs **siblings,
+                              unsigned int nsibling)
+{
+       struct i915_request *request[MAX_ENGINE_INSTANCE + 1];
+       struct i915_gem_context *ctx;
+       struct intel_context *ve;
+       struct igt_live_test t;
+       unsigned int n;
+       int err;
+
+       /*
+        * Check that by setting the execution mask on a request, we can
+        * restrict it to our desired engine within the virtual engine.
+        */
+
+       ctx = kernel_context(i915);
+       if (!ctx)
+               return -ENOMEM;
+
+       ve = intel_execlists_create_virtual(ctx, siblings, nsibling);
+       if (IS_ERR(ve)) {
+               err = PTR_ERR(ve);
+               goto out_close;
+       }
+
+       err = intel_context_pin(ve);
+       if (err)
+               goto out_put;
+
+       err = igt_live_test_begin(&t, i915, __func__, ve->engine->name);
+       if (err)
+               goto out_unpin;
+
+       for (n = 0; n < nsibling; n++) {
+               request[n] = i915_request_create(ve);
+               if (IS_ERR(request)) {
+                       err = PTR_ERR(request);
+                       nsibling = n;
+                       goto out;
+               }
+
+               /* Reverse order as it's more likely to be unnatural */
+               request[n]->execution_mask = siblings[nsibling - n - 1]->mask;
+
+               i915_request_get(request[n]);
+               i915_request_add(request[n]);
+       }
+
+       for (n = 0; n < nsibling; n++) {
+               if (i915_request_wait(request[n], I915_WAIT_LOCKED, HZ / 10) < 0) {
+                       pr_err("%s(%s): wait for %llx:%lld timed out\n",
+                              __func__, ve->engine->name,
+                              request[n]->fence.context,
+                              request[n]->fence.seqno);
+
+                       GEM_TRACE("%s(%s) failed at request %llx:%lld\n",
+                                 __func__, ve->engine->name,
+                                 request[n]->fence.context,
+                                 request[n]->fence.seqno);
+                       GEM_TRACE_DUMP();
+                       i915_gem_set_wedged(i915);
+                       err = -EIO;
+                       goto out;
+               }
+
+               if (request[n]->engine != siblings[nsibling - n - 1]) {
+                       pr_err("Executed on wrong sibling '%s', expected '%s'\n",
+                              request[n]->engine->name,
+                              siblings[nsibling - n - 1]->name);
+                       err = -EINVAL;
+                       goto out;
+               }
+       }
+
+       err = igt_live_test_end(&t);
+       if (err)
+               goto out;
+
+out:
+       if (igt_flush_test(i915, I915_WAIT_LOCKED))
+               err = -EIO;
+
+       for (n = 0; n < nsibling; n++)
+               i915_request_put(request[n]);
+
+out_unpin:
+       intel_context_unpin(ve);
+out_put:
+       intel_context_put(ve);
+out_close:
+       kernel_context_close(ctx);
+       return err;
+}
+
+static int live_virtual_mask(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1];
+       unsigned int class, inst;
+       int err = 0;
+
+       if (USES_GUC_SUBMISSION(i915))
+               return 0;
+
+       mutex_lock(&i915->drm.struct_mutex);
+
+       for (class = 0; class <= MAX_ENGINE_CLASS; class++) {
+               unsigned int nsibling;
+
+               nsibling = 0;
+               for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) {
+                       if (!i915->engine_class[class][inst])
+                               break;
+
+                       siblings[nsibling++] = i915->engine_class[class][inst];
+               }
+               if (nsibling < 2)
+                       continue;
+
+               err = mask_virtual_engine(i915, siblings, nsibling);
+               if (err)
+                       goto out_unlock;
+       }
+
+out_unlock:
+       mutex_unlock(&i915->drm.struct_mutex);
+       return err;
+}
+
  int intel_execlists_live_selftests(struct drm_i915_private *i915)
  {
         static const struct i915_subtest tests[] = {
@@ -1502,6 +1632,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915)
                 SUBTEST(live_preempt_hang),
                 SUBTEST(live_preempt_smoke),
                 SUBTEST(live_virtual_engine),
+               SUBTEST(live_virtual_mask),
         };
  
         if (!HAS_EXECLISTS(i915))
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 2fca0b59578deedfc22f650ae60212a2e5a10a50..809d6ee10da6633c1701575d5bfa74f50a5bb6be 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -694,6 +694,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp)
         rq->batch = NULL;
         rq->capture_list = NULL;
         rq->waitboost = false;
+       rq->execution_mask = ALL_ENGINES;
  
         INIT_LIST_HEAD(&rq->active_list);
         INIT_LIST_HEAD(&rq->execute_cb);
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h

index 8025a89b599916a4d3f23a34c7c5ee3ae645ba64..d7f9b2194568be8d6143cc1f5f528212108af954 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -28,6 +28,8 @@
  #include <linux/dma-fence.h>
  #include <linux/lockdep.h>
  
+#include "gt/intel_engine_types.h"
+
  #include "i915_gem.h"
  #include "i915_scheduler.h"
  #include "i915_selftest.h"
@@ -156,6 +158,7 @@ struct i915_request {
          */
         struct i915_sched_node sched;
         struct i915_dependency dep;
+       intel_engine_mask_t execution_mask;
  
         /*
          * A convenience pointer to the current breadcrumb value stored in
author	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 21 May 2019 21:11:31 +0000 (22:11 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Wed, 22 May 2019 07:40:43 +0000 (08:40 +0100)
drivers/gpu/drm/i915/gt/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/selftest_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.h		patch \| blob \| history