drm/i915/perf: fix ctx_id read with GuC & ICL

author Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Sat, 2 Jun 2018 11:29:46 +0000 (12:29 +0100)

committer Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Mon, 4 Jun 2018 17:16:08 +0000 (18:16 +0100)
author Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Sat, 2 Jun 2018 11:29:46 +0000 (12:29 +0100)
committer Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Mon, 4 Jun 2018 17:16:08 +0000 (18:16 +0100)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index b5b09c8a327f7704884a01e28689d9b3de0ec8ad..06ecac4c32534e0a573e68095fa1cc9289836ecf 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1951,6 +1951,7 @@ struct drm_i915_private {
  
                         struct intel_context *pinned_ctx;
                         u32 specific_ctx_id;
+                       u32 specific_ctx_id_mask;
  
                         struct hrtimer poll_check_timer;
                         wait_queue_head_t poll_wq;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c

index 4f0eb84b3c00de4b3ebf876e533a080f04962500..a6c8d61add0cd58f2e259422f185b4cf6497c2bc 100644 (file)
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -737,12 +737,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
                         continue;
                 }
  
-               /*
-                * XXX: Just keep the lower 21 bits for now since I'm not
-                * entirely sure if the HW touches any of the higher bits in
-                * this field
-                */
-               ctx_id = report32[2] & 0x1fffff;
+               ctx_id = report32[2] & dev_priv->perf.oa.specific_ctx_id_mask;
  
                 /*
                  * Squash whatever is in the CTX_ID field if it's marked as
@@ -1203,6 +1198,33 @@ static int i915_oa_read(struct i915_perf_stream *stream,
         return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
  }
  
+static struct intel_context *oa_pin_context(struct drm_i915_private *i915,
+                                           struct i915_gem_context *ctx)
+{
+       struct intel_engine_cs *engine = i915->engine[RCS];
+       struct intel_context *ce;
+       int ret;
+
+       ret = i915_mutex_lock_interruptible(&i915->drm);
+       if (ret)
+               return ERR_PTR(ret);
+
+       /*
+        * As the ID is the gtt offset of the context's vma we
+        * pin the vma to ensure the ID remains fixed.
+        *
+        * NB: implied RCS engine...
+        */
+       ce = intel_context_pin(ctx, engine);
+       mutex_unlock(&i915->drm.struct_mutex);
+       if (IS_ERR(ce))
+               return ce;
+
+       i915->perf.oa.pinned_ctx = ce;
+
+       return ce;
+}
+
  /**
   * oa_get_render_ctx_id - determine and hold ctx hw id
   * @stream: An i915-perf stream opened for OA metrics
@@ -1215,40 +1237,76 @@ static int i915_oa_read(struct i915_perf_stream *stream,
   */
  static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
  {
-       struct drm_i915_private *dev_priv = stream->dev_priv;
-
-       if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
-               dev_priv->perf.oa.specific_ctx_id = stream->ctx->hw_id;
-       } else {
-               struct intel_engine_cs *engine = dev_priv->engine[RCS];
-               struct intel_context *ce;
-               int ret;
+       struct drm_i915_private *i915 = stream->dev_priv;
+       struct intel_context *ce;
  
-               ret = i915_mutex_lock_interruptible(&dev_priv->drm);
-               if (ret)
-                       return ret;
+       ce = oa_pin_context(i915, stream->ctx);
+       if (IS_ERR(ce))
+               return PTR_ERR(ce);
  
+       switch (INTEL_GEN(i915)) {
+       case 7: {
                 /*
-                * As the ID is the gtt offset of the context's vma we
-                * pin the vma to ensure the ID remains fixed.
-                *
-                * NB: implied RCS engine...
+                * On Haswell we don't do any post processing of the reports
+                * and don't need to use the mask.
                  */
-               ce = intel_context_pin(stream->ctx, engine);
-               mutex_unlock(&dev_priv->drm.struct_mutex);
-               if (IS_ERR(ce))
-                       return PTR_ERR(ce);
+               i915->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state);
+               i915->perf.oa.specific_ctx_id_mask = 0;
+               break;
+       }
  
-               dev_priv->perf.oa.pinned_ctx = ce;
+       case 8:
+       case 9:
+       case 10:
+               if (USES_GUC_SUBMISSION(i915)) {
+                       /*
+                        * When using GuC, the context descriptor we write in
+                        * i915 is read by GuC and rewritten before it's
+                        * actually written into the hardware. The LRCA is
+                        * what is put into the context id field of the
+                        * context descriptor by GuC. Because it's aligned to
+                        * a page, the lower 12bits are always at 0 and
+                        * dropped by GuC. They won't be part of the context
+                        * ID in the OA reports, so squash those lower bits.
+                        */
+                       i915->perf.oa.specific_ctx_id =
+                               lower_32_bits(ce->lrc_desc) >> 12;
  
-               /*
-                * Explicitly track the ID (instead of calling
-                * i915_ggtt_offset() on the fly) considering the difference
-                * with gen8+ and execlists
-                */
-               dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(ce->state);
+                       /*
+                        * GuC uses the top bit to signal proxy submission, so
+                        * ignore that bit.
+                        */
+                       i915->perf.oa.specific_ctx_id_mask =
+                               (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1;
+               } else {
+                       i915->perf.oa.specific_ctx_id = stream->ctx->hw_id;
+                       i915->perf.oa.specific_ctx_id_mask =
+                               (1U << GEN8_CTX_ID_WIDTH) - 1;
+               }
+               break;
+
+       case 11: {
+               struct intel_engine_cs *engine = i915->engine[RCS];
+
+               i915->perf.oa.specific_ctx_id =
+                       stream->ctx->hw_id << (GEN11_SW_CTX_ID_SHIFT - 32) |
+                       engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
+                       engine->class << (GEN11_ENGINE_INSTANCE_SHIFT - 32);
+               i915->perf.oa.specific_ctx_id_mask =
+                       ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) |
+                       ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) |
+                       ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32);
+               break;
+       }
+
+       default:
+               MISSING_CASE(INTEL_GEN(i915));
         }
  
+       DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n",
+                        i915->perf.oa.specific_ctx_id,
+                        i915->perf.oa.specific_ctx_id_mask);
+
         return 0;
  }
  
@@ -1265,6 +1323,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
         struct intel_context *ce;
  
         dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
+       dev_priv->perf.oa.specific_ctx_id_mask = 0;
  
         ce = fetch_and_zero(&dev_priv->perf.oa.pinned_ctx);
         if (ce) {
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index cbc2a8d4dc9cf906f468e729fff4a22882513528..f6f09f808f74ae0d6c34a4a0ecefc6704c21af87 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -233,6 +233,11 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx,
                                                                 /* bits 12-31 */
         GEM_BUG_ON(desc & GENMASK_ULL(63, 32));
  
+       /*
+        * The following 32bits are copied into the OA reports (dword 2).
+        * Consider updating oa_get_render_ctx_id in i915_perf.c when changing
+        * anything below.
+        */
         if (INTEL_GEN(ctx->i915) >= 11) {
                 GEM_BUG_ON(ctx->hw_id >= BIT(GEN11_SW_CTX_ID_WIDTH));
                 desc |= (u64)ctx->hw_id << GEN11_SW_CTX_ID_SHIFT;
author	Lionel Landwerlin <lionel.g.landwerlin@intel.com>
	Sat, 2 Jun 2018 11:29:46 +0000 (12:29 +0100)
committer	Lionel Landwerlin <lionel.g.landwerlin@intel.com>
	Mon, 4 Jun 2018 17:16:08 +0000 (18:16 +0100)
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_perf.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_lrc.c		patch \| blob \| history