drm/i915: Add mechanism to submit a context WA on ring submission

author Mika Kuoppala <mika.kuoppala@linux.intel.com>

Fri, 6 Mar 2020 00:09:56 +0000 (00:09 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Fri, 6 Mar 2020 08:59:06 +0000 (08:59 +0000)
author Mika Kuoppala <mika.kuoppala@linux.intel.com>
Fri, 6 Mar 2020 00:09:56 +0000 (00:09 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Fri, 6 Mar 2020 08:59:06 +0000 (08:59 +0000)
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c

index ee241b7eaa3bf8ebeeaa06cb7c6dc898feb47d84..f0ce70861e930863d201a0dc19820f52058f94ae 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -1356,7 +1356,9 @@ static int load_pd_dir(struct i915_request *rq,
         return rq->engine->emit_flush(rq, EMIT_FLUSH);
  }
  
-static inline int mi_set_context(struct i915_request *rq, u32 flags)
+static inline int mi_set_context(struct i915_request *rq,
+                                struct intel_context *ce,
+                                u32 flags)
  {
         struct drm_i915_private *i915 = rq->i915;
         struct intel_engine_cs *engine = rq->engine;
@@ -1431,7 +1433,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
  
         *cs++ = MI_NOOP;
         *cs++ = MI_SET_CONTEXT;
-       *cs++ = i915_ggtt_offset(rq->context->state) | flags;
+       *cs++ = i915_ggtt_offset(ce->state) | flags;
         /*
          * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
          * WaMiSetContext_Hang:snb,ivb,vlv
@@ -1546,13 +1548,56 @@ static int switch_mm(struct i915_request *rq, struct i915_address_space *vm)
         return rq->engine->emit_flush(rq, EMIT_INVALIDATE);
  }
  
+static int clear_residuals(struct i915_request *rq)
+{
+       struct intel_engine_cs *engine = rq->engine;
+       int ret;
+
+       ret = switch_mm(rq, vm_alias(engine->kernel_context->vm));
+       if (ret)
+               return ret;
+
+       if (engine->kernel_context->state) {
+               ret = mi_set_context(rq,
+                                    engine->kernel_context,
+                                    MI_MM_SPACE_GTT | MI_RESTORE_INHIBIT);
+               if (ret)
+                       return ret;
+       }
+
+       ret = engine->emit_bb_start(rq,
+                                   engine->wa_ctx.vma->node.start, 0,
+                                   0);
+       if (ret)
+               return ret;
+
+       ret = engine->emit_flush(rq, EMIT_FLUSH);
+       if (ret)
+               return ret;
+
+       /* Always invalidate before the next switch_mm() */
+       return engine->emit_flush(rq, EMIT_INVALIDATE);
+}
+
  static int switch_context(struct i915_request *rq)
  {
+       struct intel_engine_cs *engine = rq->engine;
         struct intel_context *ce = rq->context;
+       void **residuals = NULL;
         int ret;
  
         GEM_BUG_ON(HAS_EXECLISTS(rq->i915));
  
+       if (engine->wa_ctx.vma && ce != engine->kernel_context) {
+               if (engine->wa_ctx.vma->private != ce) {
+                       ret = clear_residuals(rq);
+                       if (ret)
+                               return ret;
+
+                       residuals = &engine->wa_ctx.vma->private;
+               }
+       }
+
         ret = switch_mm(rq, vm_alias(ce->vm));
         if (ret)
                 return ret;
@@ -1560,7 +1605,7 @@ static int switch_context(struct i915_request *rq)
         if (ce->state) {
                 u32 flags;
  
-               GEM_BUG_ON(rq->engine->id != RCS0);
+               GEM_BUG_ON(engine->id != RCS0);
  
                 /* For resource streamer on HSW+ and power context elsewhere */
                 BUILD_BUG_ON(HSW_MI_RS_SAVE_STATE_EN != MI_SAVE_EXT_STATE_EN);
@@ -1572,7 +1617,7 @@ static int switch_context(struct i915_request *rq)
                 else
                         flags |= MI_RESTORE_INHIBIT;
  
-               ret = mi_set_context(rq, flags);
+               ret = mi_set_context(rq, ce, flags);
                 if (ret)
                         return ret;
         }
@@ -1581,6 +1626,20 @@ static int switch_context(struct i915_request *rq)
         if (ret)
                 return ret;
  
+       /*
+        * Now past the point of no return, this request _will_ be emitted.
+        *
+        * Or at least this preamble will be emitted, the request may be
+        * interrupted prior to submitting the user payload. If so, we
+        * still submit the "empty" request in order to preserve global
+        * state tracking such as this, our tracking of the current
+        * dirty context.
+        */
+       if (residuals) {
+               intel_context_put(*residuals);
+               *residuals = intel_context_get(ce);
+       }
+
         return 0;
  }
  
@@ -1765,6 +1824,11 @@ static void ring_release(struct intel_engine_cs *engine)
  
         intel_engine_cleanup_common(engine);
  
+       if (engine->wa_ctx.vma) {
+               intel_context_put(engine->wa_ctx.vma->private);
+               i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0);
+       }
+
         intel_ring_unpin(engine->legacy.ring);
         intel_ring_put(engine->legacy.ring);
  
@@ -1912,6 +1976,60 @@ static void setup_vecs(struct intel_engine_cs *engine)
         engine->emit_fini_breadcrumb = gen7_xcs_emit_breadcrumb;
  }
  
+static int gen7_ctx_switch_bb_setup(struct intel_engine_cs * const engine,
+                                   struct i915_vma * const vma)
+{
+       return 0;
+}
+
+static int gen7_ctx_switch_bb_init(struct intel_engine_cs *engine)
+{
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+       int size;
+       int err;
+
+       size = gen7_ctx_switch_bb_setup(engine, NULL /* probe size */);
+       if (size <= 0)
+               return size;
+
+       size = ALIGN(size, PAGE_SIZE);
+       obj = i915_gem_object_create_internal(engine->i915, size);
+       if (IS_ERR(obj))
+               return PTR_ERR(obj);
+
+       vma = i915_vma_instance(obj, engine->gt->vm, NULL);
+       if (IS_ERR(vma)) {
+               err = PTR_ERR(vma);
+               goto err_obj;
+       }
+
+       vma->private = intel_context_create(engine); /* dummy residuals */
+       if (IS_ERR(vma->private)) {
+               err = PTR_ERR(vma->private);
+               goto err_obj;
+       }
+
+       err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+       if (err)
+               goto err_private;
+
+       err = gen7_ctx_switch_bb_setup(engine, vma);
+       if (err)
+               goto err_unpin;
+
+       engine->wa_ctx.vma = vma;
+       return 0;
+
+err_unpin:
+       i915_vma_unpin(vma);
+err_private:
+       intel_context_put(vma->private);
+err_obj:
+       i915_gem_object_put(obj);
+       return err;
+}
+
  int intel_ring_submission_setup(struct intel_engine_cs *engine)
  {
         struct intel_timeline *timeline;
@@ -1965,11 +2083,19 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine)
  
         GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma);
  
+       if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) {
+               err = gen7_ctx_switch_bb_init(engine);
+               if (err)
+                       goto err_ring_unpin;
+       }
+
         /* Finally, take ownership and responsibility for cleanup! */
         engine->release = ring_release;
  
         return 0;
  
+err_ring_unpin:
+       intel_ring_unpin(ring);
  err_ring:
         intel_ring_put(ring);
  err_timeline_unpin:
@@ -1980,3 +2106,7 @@ err:
         intel_engine_cleanup_common(engine);
         return err;
  }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftest_ring_submission.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gt/selftest_ring_submission.c b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c

new file mode 100644 (file)

index 0000000..fcfddcd
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/selftest_ring_submission.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "intel_engine_pm.h"
+#include "selftests/igt_flush_test.h"
+
+static struct i915_vma *create_wally(struct intel_engine_cs *engine)
+{
+       struct drm_i915_gem_object *obj;
+       struct i915_vma *vma;
+       u32 *cs;
+       int err;
+
+       obj = i915_gem_object_create_internal(engine->i915, 4096);
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
+
+       vma = i915_vma_instance(obj, engine->gt->vm, NULL);
+       if (IS_ERR(vma)) {
+               i915_gem_object_put(obj);
+               return vma;
+       }
+
+       err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+       if (err) {
+               i915_gem_object_put(obj);
+               return ERR_PTR(err);
+       }
+
+       cs = i915_gem_object_pin_map(obj, I915_MAP_WC);
+       if (IS_ERR(cs)) {
+               i915_gem_object_put(obj);
+               return ERR_CAST(cs);
+       }
+
+       if (INTEL_GEN(engine->i915) >= 6) {
+               *cs++ = MI_STORE_DWORD_IMM_GEN4;
+               *cs++ = 0;
+       } else if (INTEL_GEN(engine->i915) >= 4) {
+               *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+               *cs++ = 0;
+       } else {
+               *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+       }
+       *cs++ = vma->node.start + 4000;
+       *cs++ = STACK_MAGIC;
+
+       *cs++ = MI_BATCH_BUFFER_END;
+       i915_gem_object_unpin_map(obj);
+
+       vma->private = intel_context_create(engine); /* dummy residuals */
+       if (IS_ERR(vma->private)) {
+               vma = ERR_CAST(vma->private);
+               i915_gem_object_put(obj);
+       }
+
+       return vma;
+}
+
+static int context_sync(struct intel_context *ce)
+{
+       struct i915_request *rq;
+       int err = 0;
+
+       rq = intel_context_create_request(ce);
+       if (IS_ERR(rq))
+               return PTR_ERR(rq);
+
+       i915_request_get(rq);
+       i915_request_add(rq);
+
+       if (i915_request_wait(rq, 0, HZ / 5) < 0)
+               err = -ETIME;
+       i915_request_put(rq);
+
+       return err;
+}
+
+static int new_context_sync(struct intel_engine_cs *engine)
+{
+       struct intel_context *ce;
+       int err;
+
+       ce = intel_context_create(engine);
+       if (IS_ERR(ce))
+               return PTR_ERR(ce);
+
+       err = context_sync(ce);
+       intel_context_put(ce);
+
+       return err;
+}
+
+static int mixed_contexts_sync(struct intel_engine_cs *engine, u32 *result)
+{
+       int pass;
+       int err;
+
+       for (pass = 0; pass < 2; pass++) {
+               WRITE_ONCE(*result, 0);
+               err = context_sync(engine->kernel_context);
+               if (err || READ_ONCE(*result)) {
+                       if (!err) {
+                               pr_err("pass[%d] wa_bb emitted for the kernel context\n",
+                                      pass);
+                               err = -EINVAL;
+                       }
+                       return err;
+               }
+
+               WRITE_ONCE(*result, 0);
+               err = new_context_sync(engine);
+               if (READ_ONCE(*result) != STACK_MAGIC) {
+                       if (!err) {
+                               pr_err("pass[%d] wa_bb *NOT* emitted after the kernel context\n",
+                                      pass);
+                               err = -EINVAL;
+                       }
+                       return err;
+               }
+
+               WRITE_ONCE(*result, 0);
+               err = new_context_sync(engine);
+               if (READ_ONCE(*result) != STACK_MAGIC) {
+                       if (!err) {
+                               pr_err("pass[%d] wa_bb *NOT* emitted for the user context switch\n",
+                                      pass);
+                               err = -EINVAL;
+                       }
+                       return err;
+               }
+       }
+
+       return 0;
+}
+
+static int double_context_sync_00(struct intel_engine_cs *engine, u32 *result)
+{
+       struct intel_context *ce;
+       int err, i;
+
+       ce = intel_context_create(engine);
+       if (IS_ERR(ce))
+               return PTR_ERR(ce);
+
+       for (i = 0; i < 2; i++) {
+               WRITE_ONCE(*result, 0);
+               err = context_sync(ce);
+               if (err)
+                       break;
+       }
+       intel_context_put(ce);
+       if (err)
+               return err;
+
+       if (READ_ONCE(*result)) {
+               pr_err("wa_bb emitted between the same user context\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int kernel_context_sync_00(struct intel_engine_cs *engine, u32 *result)
+{
+       struct intel_context *ce;
+       int err, i;
+
+       ce = intel_context_create(engine);
+       if (IS_ERR(ce))
+               return PTR_ERR(ce);
+
+       for (i = 0; i < 2; i++) {
+               WRITE_ONCE(*result, 0);
+               err = context_sync(ce);
+               if (err)
+                       break;
+
+               err = context_sync(engine->kernel_context);
+               if (err)
+                       break;
+       }
+       intel_context_put(ce);
+       if (err)
+               return err;
+
+       if (READ_ONCE(*result)) {
+               pr_err("wa_bb emitted between the same user context [with intervening kernel]\n");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int __live_ctx_switch_wa(struct intel_engine_cs *engine)
+{
+       struct i915_vma *bb;
+       u32 *result;
+       int err;
+
+       bb = create_wally(engine);
+       if (IS_ERR(bb))
+               return PTR_ERR(bb);
+
+       result = i915_gem_object_pin_map(bb->obj, I915_MAP_WC);
+       if (IS_ERR(result)) {
+               intel_context_put(bb->private);
+               i915_vma_unpin_and_release(&bb, 0);
+               return PTR_ERR(result);
+       }
+       result += 1000;
+
+       engine->wa_ctx.vma = bb;
+
+       err = mixed_contexts_sync(engine, result);
+       if (err)
+               goto out;
+
+       err = double_context_sync_00(engine, result);
+       if (err)
+               goto out;
+
+       err = kernel_context_sync_00(engine, result);
+       if (err)
+               goto out;
+
+out:
+       intel_context_put(engine->wa_ctx.vma->private);
+       i915_vma_unpin_and_release(&engine->wa_ctx.vma, I915_VMA_RELEASE_MAP);
+       return err;
+}
+
+static int live_ctx_switch_wa(void *arg)
+{
+       struct intel_gt *gt = arg;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       /*
+        * Exercise the inter-context wa batch.
+        *
+        * Between each user context we run a wa batch, and since it may
+        * have implications for user visible state, we have to check that
+        * we do actually execute it.
+        *
+        * The trick we use is to replace the normal wa batch with a custom
+        * one that writes to a marker within it, and we can then look for
+        * that marker to confirm if the batch was run when we expect it,
+        * and equally important it was wasn't run when we don't!
+        */
+
+       for_each_engine(engine, gt, id) {
+               struct i915_vma *saved_wa;
+               int err;
+
+               if (!intel_engine_can_store_dword(engine))
+                       continue;
+
+               if (IS_GEN_RANGE(gt->i915, 4, 5))
+                       continue; /* MI_STORE_DWORD is privileged! */
+
+               saved_wa = fetch_and_zero(&engine->wa_ctx.vma);
+
+               intel_engine_pm_get(engine);
+               err = __live_ctx_switch_wa(engine);
+               intel_engine_pm_put(engine);
+               if (igt_flush_test(gt->i915))
+                       err = -EIO;
+
+               engine->wa_ctx.vma = saved_wa;
+               if (err)
+                       return err;
+       }
+
+       return 0;
+}
+
+int intel_ring_submission_live_selftests(struct drm_i915_private *i915)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(live_ctx_switch_wa),
+       };
+
+       if (HAS_EXECLISTS(i915))
+               return 0;
+
+       return intel_gt_live_subtests(tests, &i915->gt);
+}
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h

index 34138c7bdd150b5d2b147c01ca542f0e403a0a03..0a953bfc0585617a881fcde39b9e02982a23d350 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -43,6 +43,7 @@ selftest(reset, intel_reset_live_selftests)
  selftest(memory_region, intel_memory_region_live_selftests)
  selftest(hangcheck, intel_hangcheck_live_selftests)
  selftest(execlists, intel_execlists_live_selftests)
+selftest(ring_submission, intel_ring_submission_live_selftests)
  selftest(perf, i915_perf_live_selftests)
  /* Here be dragons: keep last to run last! */
  selftest(late_gt_pm, intel_gt_pm_late_selftests)
author	Mika Kuoppala <mika.kuoppala@linux.intel.com>
	Fri, 6 Mar 2020 00:09:56 +0000 (00:09 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 6 Mar 2020 08:59:06 +0000 (08:59 +0000)
drivers/gpu/drm/i915/gt/intel_ring_submission.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/selftest_ring_submission.c	[new file with mode: 0644]	patch \| blob
drivers/gpu/drm/i915/selftests/i915_live_selftests.h		patch \| blob \| history