drm/i915/tgl: Extend MI_SEMAPHORE_WAIT

author Chris Wilson <chris@chris-wilson.co.uk>

Tue, 17 Sep 2019 12:30:55 +0000 (13:30 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Tue, 17 Sep 2019 14:33:21 +0000 (15:33 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Tue, 17 Sep 2019 12:30:55 +0000 (13:30 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Tue, 17 Sep 2019 14:33:21 +0000 (15:33 +0100)
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h

index fbad403ab7ac022a800526a2b004ea8587364d3b..f78b13d74e17a1fedcdf62816aae936ba9d35bfd 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -112,6 +112,7 @@
  #define MI_SEMAPHORE_SIGNAL    MI_INSTR(0x1b, 0) /* GEN8+ */
  #define   MI_SEMAPHORE_TARGET(engine)  ((engine)<<15)
  #define MI_SEMAPHORE_WAIT      MI_INSTR(0x1c, 2) /* GEN8+ */
+#define MI_SEMAPHORE_WAIT_TOKEN        MI_INSTR(0x1c, 3) /* GEN12+ */
  #define   MI_SEMAPHORE_POLL            (1 << 15)
  #define   MI_SEMAPHORE_SAD_GT_SDD      (0 << 12)
  #define   MI_SEMAPHORE_SAD_GTE_SDD     (1 << 12)
@@ -119,6 +120,8 @@
  #define   MI_SEMAPHORE_SAD_LTE_SDD     (3 << 12)
  #define   MI_SEMAPHORE_SAD_EQ_SDD      (4 << 12)
  #define   MI_SEMAPHORE_SAD_NEQ_SDD     (5 << 12)
+#define   MI_SEMAPHORE_TOKEN_MASK      REG_GENMASK(9, 5)
+#define   MI_SEMAPHORE_TOKEN_SHIFT     5
  #define MI_STORE_DWORD_IMM     MI_INSTR(0x20, 1)
  #define MI_STORE_DWORD_IMM_GEN4        MI_INSTR(0x20, 2)
  #define   MI_MEM_VIRTUAL       (1 << 22) /* 945,g33,965 */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c

index a3f0e499974402d46ade0e0e787ba464b06345e6..a99166a2d2eb9ebaeefb8345d20b66ed5cbc3d04 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2879,6 +2879,22 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
         return gen8_emit_fini_breadcrumb_footer(request, cs);
  }
  
+static u32 *
+gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
+{
+       cs = gen8_emit_ggtt_write_rcs(cs,
+                                     request->fence.seqno,
+                                     request->timeline->hwsp_offset,
+                                     PIPE_CONTROL_CS_STALL |
+                                     PIPE_CONTROL_TILE_CACHE_FLUSH |
+                                     PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+                                     PIPE_CONTROL_DC_FLUSH_ENABLE |
+                                     PIPE_CONTROL_FLUSH_ENABLE);
+
+       return gen8_emit_fini_breadcrumb_footer(request, cs);
+}
+
  /*
   * Note that the CS instruction pre-parser will not stall on the breadcrumb
   * flush and will continue pre-fetching the instructions after it before the
@@ -2897,8 +2913,49 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
   * All the above applies only to the instructions themselves. Non-inline data
   * used by the instructions is not pre-fetched.
   */
-static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
-                                          u32 *cs)
+
+static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs)
+{
+       *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+               MI_SEMAPHORE_GLOBAL_GTT |
+               MI_SEMAPHORE_POLL |
+               MI_SEMAPHORE_SAD_EQ_SDD;
+       *cs++ = 0;
+       *cs++ = intel_hws_preempt_address(request->engine);
+       *cs++ = 0;
+       *cs++ = 0;
+       *cs++ = MI_NOOP;
+
+       return cs;
+}
+
+static __always_inline u32*
+gen12_emit_fini_breadcrumb_footer(struct i915_request *request, u32 *cs)
+{
+       *cs++ = MI_USER_INTERRUPT;
+
+       *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+       if (intel_engine_has_semaphores(request->engine))
+               cs = gen12_emit_preempt_busywait(request, cs);
+
+       request->tail = intel_ring_offset(request, cs);
+       assert_ring_tail_valid(request->ring, request->tail);
+
+       return gen8_emit_wa_tail(request, cs);
+}
+
+static u32 *gen12_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
+{
+       cs = gen8_emit_ggtt_write(cs,
+                                 request->fence.seqno,
+                                 request->timeline->hwsp_offset,
+                                 0);
+
+       return gen12_emit_fini_breadcrumb_footer(request, cs);
+}
+
+static u32 *
+gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
  {
         cs = gen8_emit_ggtt_write_rcs(cs,
                                       request->fence.seqno,
@@ -2910,7 +2967,7 @@ static u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *request,
                                       PIPE_CONTROL_DC_FLUSH_ENABLE |
                                       PIPE_CONTROL_FLUSH_ENABLE);
  
-       return gen8_emit_fini_breadcrumb_footer(request, cs);
+       return gen12_emit_fini_breadcrumb_footer(request, cs);
  }
  
  static void execlists_park(struct intel_engine_cs *engine)
@@ -2939,9 +2996,6 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
                         engine->flags |= I915_ENGINE_HAS_PREEMPTION;
         }
  
-       if (INTEL_GEN(engine->i915) >= 12) /* XXX disabled for debugging */
-               engine->flags &= ~I915_ENGINE_HAS_SEMAPHORES;
-
         if (engine->class != COPY_ENGINE_CLASS && INTEL_GEN(engine->i915) >= 12)
                 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO;
  }
@@ -2971,6 +3025,8 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine)
         engine->emit_flush = gen8_emit_flush;
         engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb;
         engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb;
+       if (INTEL_GEN(engine->i915) >= 12)
+               engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb;
  
         engine->set_default_submission = intel_execlists_set_default_submission;
  
@@ -3016,6 +3072,9 @@ static void rcs_submission_override(struct intel_engine_cs *engine)
  {
         switch (INTEL_GEN(engine->i915)) {
         case 12:
+               engine->emit_flush = gen11_emit_flush_render;
+               engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs;
+               break;
         case 11:
                 engine->emit_flush = gen11_emit_flush_render;
                 engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs;
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c

index e4a26bbd87882f79c29c1fe131e06632a6388f33..fe6941c8fc99db9d81c3efcce69e23ff533925d0 100644 (file)
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -797,7 +797,6 @@ static const struct intel_device_info intel_tigerlake_12_info = {
         .display.has_modular_fia = 1,
         .engine_mask =
                 BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) | BIT(VCS2),
-       .has_logical_ring_preemption = false, /* XXX disabled for debugging */
         .engine_mask = BIT(RCS0), /* XXX reduced for debugging */
  };
  
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 754a78364a633bf696fd1ce25985469557472c93..3ecf92aa5fc123739f9b5de5a264bf89477be62a 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -783,7 +783,9 @@ emit_semaphore_wait(struct i915_request *to,
                     struct i915_request *from,
                     gfp_t gfp)
  {
+       const int has_token = INTEL_GEN(to->i915) >= 12;
         u32 hwsp_offset;
+       int len;
         u32 *cs;
         int err;
  
@@ -810,7 +812,11 @@ emit_semaphore_wait(struct i915_request *to,
         if (err)
                 return err;
  
-       cs = intel_ring_begin(to, 4);
+       len = 4;
+       if (has_token)
+               len += 2;
+
+       cs = intel_ring_begin(to, len);
         if (IS_ERR(cs))
                 return PTR_ERR(cs);
  
@@ -822,13 +828,18 @@ emit_semaphore_wait(struct i915_request *to,
          * (post-wrap) values than they were expecting (and so wait
          * forever).
          */
-       *cs++ = MI_SEMAPHORE_WAIT |
-               MI_SEMAPHORE_GLOBAL_GTT |
-               MI_SEMAPHORE_POLL |
-               MI_SEMAPHORE_SAD_GTE_SDD;
+       *cs++ = (MI_SEMAPHORE_WAIT |
+                MI_SEMAPHORE_GLOBAL_GTT |
+                MI_SEMAPHORE_POLL |
+                MI_SEMAPHORE_SAD_GTE_SDD) +
+               has_token;
         *cs++ = from->fence.seqno;
         *cs++ = hwsp_offset;
         *cs++ = 0;
+       if (has_token) {
+               *cs++ = 0;
+               *cs++ = MI_NOOP;
+       }
  
         intel_ring_advance(to, cs);
         to->sched.semaphores |= from->engine->mask;
author	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 17 Sep 2019 12:30:55 +0000 (13:30 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 17 Sep 2019 14:33:21 +0000 (15:33 +0100)
drivers/gpu/drm/i915/gt/intel_gpu_commands.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_pci.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history