drm/i915/perf: implement active wait for noa configurations

author Lionel Landwerlin <lionel.g.landwerlin@intel.com>

Sat, 12 Oct 2019 07:23:07 +0000 (08:23 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Sat, 12 Oct 2019 08:08:33 +0000 (09:08 +0100)
author Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Sat, 12 Oct 2019 07:23:07 +0000 (08:23 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Sat, 12 Oct 2019 08:08:33 +0000 (09:08 +0100)
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h

index 0987100c786b0e9c4831483e5f7491cf2df8e6a7..8e63cffcabe06c28be94e8c5ea014a054656d65e 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -163,7 +163,8 @@
  #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
  #define   MI_BATCH_GTT             (2<<6) /* aliased with (1<<7) on gen4 */
  #define MI_BATCH_BUFFER_START_GEN8     MI_INSTR(0x31, 1)
-#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
+#define   MI_BATCH_RESOURCE_STREAMER REG_BIT(10)
+#define   MI_BATCH_PREDICATE         REG_BIT(15) /* HSW+ on RCS only*/
  
  /*
   * 3D instructions used by the kernel
@@ -224,6 +225,7 @@
  #define   PIPE_CONTROL_CS_STALL                                (1<<20)
  #define   PIPE_CONTROL_TLB_INVALIDATE                  (1<<18)
  #define   PIPE_CONTROL_MEDIA_STATE_CLEAR               (1<<16)
+#define   PIPE_CONTROL_WRITE_TIMESTAMP                 (3<<14)
  #define   PIPE_CONTROL_QW_WRITE                                (1<<14)
  #define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
  #define   PIPE_CONTROL_DEPTH_STALL                     (1<<13)
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h

index 802f516a34301f739b1c814d8d095230e0bde02e..be4b263621c8fddee7cb124c1960248cd0f7636e 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -109,6 +109,11 @@ enum intel_gt_scratch_field {
         /* 8 bytes */
         INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
  
+       /* 6 * 8 bytes */
+       INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
+
+       /* 4 bytes */
+       INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
  };
  
  #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index e575761550ac56af5eb6de71e5d8d493e8f9aae8..a541b6ae534fa76593c69ed9459a092d3c409945 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3590,6 +3590,37 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_wedged_fops,
                         i915_wedged_get, i915_wedged_set,
                         "%llu\n");
  
+static int
+i915_perf_noa_delay_set(void *data, u64 val)
+{
+       struct drm_i915_private *i915 = data;
+       const u32 clk = RUNTIME_INFO(i915)->cs_timestamp_frequency_khz;
+
+       /*
+        * This would lead to infinite waits as we're doing timestamp
+        * difference on the CS with only 32bits.
+        */
+       if (val > mul_u32_u32(U32_MAX, clk))
+               return -EINVAL;
+
+       atomic64_set(&i915->perf.noa_programming_delay, val);
+       return 0;
+}
+
+static int
+i915_perf_noa_delay_get(void *data, u64 *val)
+{
+       struct drm_i915_private *i915 = data;
+
+       *val = atomic64_read(&i915->perf.noa_programming_delay);
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_perf_noa_delay_fops,
+                       i915_perf_noa_delay_get,
+                       i915_perf_noa_delay_set,
+                       "%llu\n");
+
  #define DROP_UNBOUND   BIT(0)
  #define DROP_BOUND     BIT(1)
  #define DROP_RETIRE    BIT(2)
@@ -4345,6 +4376,7 @@ static const struct i915_debugfs_files {
         const char *name;
         const struct file_operations *fops;
  } i915_debugfs_files[] = {
+       {"i915_perf_noa_delay", &i915_perf_noa_delay_fops},
         {"i915_wedged", &i915_wedged_fops},
         {"i915_cache_sharing", &i915_cache_sharing_fops},
         {"i915_gem_drop_caches", &i915_drop_caches_fops},
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c

index 50f2f972020df31071a17d6d29c8e7bc5f3f3159..81e8a79340019968cdfbb90805b250a20871380f 100644 (file)
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -198,6 +198,7 @@
  #include "gem/i915_gem_context.h"
  #include "gt/intel_engine_pm.h"
  #include "gt/intel_engine_user.h"
+#include "gt/intel_gt.h"
  #include "gt/intel_lrc_reg.h"
  
  #include "i915_drv.h"
@@ -1347,6 +1348,12 @@ free_oa_configs(struct i915_perf_stream *stream)
                 free_oa_config_bo(oa_bo);
  }
  
+static void
+free_noa_wait(struct i915_perf_stream *stream)
+{
+       i915_vma_unpin_and_release(&stream->noa_wait, 0);
+}
+
  static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
  {
         struct i915_perf *perf = stream->perf;
@@ -1369,6 +1376,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
                 oa_put_render_ctx_id(stream);
  
         free_oa_configs(stream);
+       free_noa_wait(stream);
  
         if (perf->spurious_report_rs.missed) {
                 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n",
@@ -1529,6 +1537,206 @@ err_unref:
         return ret;
  }
  
+static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
+                                 bool save, i915_reg_t reg, u32 offset,
+                                 u32 dword_count)
+{
+       u32 cmd;
+       u32 d;
+
+       cmd = save ? MI_STORE_REGISTER_MEM : MI_LOAD_REGISTER_MEM;
+       if (INTEL_GEN(stream->perf->i915) >= 8)
+               cmd++;
+
+       for (d = 0; d < dword_count; d++) {
+               *cs++ = cmd;
+               *cs++ = i915_mmio_reg_offset(reg) + 4 * d;
+               *cs++ = intel_gt_scratch_offset(stream->engine->gt,
+                                               offset) + 4 * d;
+               *cs++ = 0;
+       }
+
+       return cs;
+}
+
+static int alloc_noa_wait(struct i915_perf_stream *stream)
+{
+       struct drm_i915_private *i915 = stream->perf->i915;
+       struct drm_i915_gem_object *bo;
+       struct i915_vma *vma;
+       const u64 delay_ticks = 0xffffffffffffffff -
+               DIV64_U64_ROUND_UP(
+                       atomic64_read(&stream->perf->noa_programming_delay) *
+                       RUNTIME_INFO(i915)->cs_timestamp_frequency_khz,
+                       1000000ull);
+       const u32 base = stream->engine->mmio_base;
+#define CS_GPR(x) GEN8_RING_CS_GPR(base, x)
+       u32 *batch, *ts0, *cs, *jump;
+       int ret, i;
+       enum {
+               START_TS,
+               NOW_TS,
+               DELTA_TS,
+               JUMP_PREDICATE,
+               DELTA_TARGET,
+               N_CS_GPR
+       };
+
+       bo = i915_gem_object_create_internal(i915, 4096);
+       if (IS_ERR(bo)) {
+               DRM_ERROR("Failed to allocate NOA wait batchbuffer\n");
+               return PTR_ERR(bo);
+       }
+
+       /*
+        * We pin in GGTT because we jump into this buffer now because
+        * multiple OA config BOs will have a jump to this address and it
+        * needs to be fixed during the lifetime of the i915/perf stream.
+        */
+       vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 0, PIN_HIGH);
+       if (IS_ERR(vma)) {
+               ret = PTR_ERR(vma);
+               goto err_unref;
+       }
+
+       batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
+       if (IS_ERR(batch)) {
+               ret = PTR_ERR(batch);
+               goto err_unpin;
+       }
+
+       /* Save registers. */
+       for (i = 0; i < N_CS_GPR; i++)
+               cs = save_restore_register(
+                       stream, cs, true /* save */, CS_GPR(i),
+                       INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+       cs = save_restore_register(
+               stream, cs, true /* save */, MI_PREDICATE_RESULT_1,
+               INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+       /* First timestamp snapshot location. */
+       ts0 = cs;
+
+       /*
+        * Initial snapshot of the timestamp register to implement the wait.
+        * We work with 32b values, so clear out the top 32b bits of the
+        * register because the ALU works 64bits.
+        */
+       *cs++ = MI_LOAD_REGISTER_IMM(1);
+       *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS)) + 4;
+       *cs++ = 0;
+       *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+       *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
+       *cs++ = i915_mmio_reg_offset(CS_GPR(START_TS));
+
+       /*
+        * This is the location we're going to jump back into until the
+        * required amount of time has passed.
+        */
+       jump = cs;
+
+       /*
+        * Take another snapshot of the timestamp register. Take care to clear
+        * up the top 32bits of CS_GPR(1) as we're using it for other
+        * operations below.
+        */
+       *cs++ = MI_LOAD_REGISTER_IMM(1);
+       *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS)) + 4;
+       *cs++ = 0;
+       *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+       *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(base));
+       *cs++ = i915_mmio_reg_offset(CS_GPR(NOW_TS));
+
+       /*
+        * Do a diff between the 2 timestamps and store the result back into
+        * CS_GPR(1).
+        */
+       *cs++ = MI_MATH(5);
+       *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(NOW_TS));
+       *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(START_TS));
+       *cs++ = MI_MATH_SUB;
+       *cs++ = MI_MATH_STORE(MI_MATH_REG(DELTA_TS), MI_MATH_REG_ACCU);
+       *cs++ = MI_MATH_STORE(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+       /*
+        * Transfer the carry flag (set to 1 if ts1 < ts0, meaning the
+        * timestamp have rolled over the 32bits) into the predicate register
+        * to be used for the predicated jump.
+        */
+       *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+       *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
+       *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+       /* Restart from the beginning if we had timestamps roll over. */
+       *cs++ = (INTEL_GEN(i915) < 8 ?
+                MI_BATCH_BUFFER_START :
+                MI_BATCH_BUFFER_START_GEN8) |
+               MI_BATCH_PREDICATE;
+       *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
+       *cs++ = 0;
+
+       /*
+        * Now add the diff between to previous timestamps and add it to :
+        *      (((1 * << 64) - 1) - delay_ns)
+        *
+        * When the Carry Flag contains 1 this means the elapsed time is
+        * longer than the expected delay, and we can exit the wait loop.
+        */
+       *cs++ = MI_LOAD_REGISTER_IMM(2);
+       *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET));
+       *cs++ = lower_32_bits(delay_ticks);
+       *cs++ = i915_mmio_reg_offset(CS_GPR(DELTA_TARGET)) + 4;
+       *cs++ = upper_32_bits(delay_ticks);
+
+       *cs++ = MI_MATH(4);
+       *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(DELTA_TS));
+       *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(DELTA_TARGET));
+       *cs++ = MI_MATH_ADD;
+       *cs++ = MI_MATH_STOREINV(MI_MATH_REG(JUMP_PREDICATE), MI_MATH_REG_CF);
+
+       /*
+        * Transfer the result into the predicate register to be used for the
+        * predicated jump.
+        */
+       *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
+       *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
+       *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1);
+
+       /* Predicate the jump.  */
+       *cs++ = (INTEL_GEN(i915) < 8 ?
+                MI_BATCH_BUFFER_START :
+                MI_BATCH_BUFFER_START_GEN8) |
+               MI_BATCH_PREDICATE;
+       *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
+       *cs++ = 0;
+
+       /* Restore registers. */
+       for (i = 0; i < N_CS_GPR; i++)
+               cs = save_restore_register(
+                       stream, cs, false /* restore */, CS_GPR(i),
+                       INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+       cs = save_restore_register(
+               stream, cs, false /* restore */, MI_PREDICATE_RESULT_1,
+               INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+
+       /* And return to the ring. */
+       *cs++ = MI_BATCH_BUFFER_END;
+
+       GEM_BUG_ON(cs - batch > PAGE_SIZE / sizeof(*batch));
+
+       i915_gem_object_flush_map(bo);
+       i915_gem_object_unpin_map(bo);
+
+       stream->noa_wait = vma;
+       return 0;
+
+err_unpin:
+       __i915_vma_unpin(vma);
+err_unref:
+       i915_gem_object_put(bo);
+       return ret;
+}
+
  static void config_oa_regs(struct intel_uncore *uncore,
                            const struct i915_oa_reg *regs,
                            u32 n_regs)
@@ -2206,6 +2414,12 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
                 }
         }
  
+       ret = alloc_noa_wait(stream);
+       if (ret) {
+               DRM_DEBUG("Unable to allocate NOA wait batch buffer\n");
+               goto err_noa_wait_alloc;
+       }
+
         stream->oa_config = i915_perf_get_oa_config(perf, props->metrics_set);
         if (!stream->oa_config) {
                 DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set);
@@ -2265,6 +2479,9 @@ err_oa_buf_alloc:
         intel_engine_pm_put(stream->engine);
  
  err_config:
+       free_noa_wait(stream);
+
+err_noa_wait_alloc:
         if (stream->ctx)
                 oa_put_render_ctx_id(stream);
  
@@ -3651,6 +3868,9 @@ void i915_perf_init(struct drm_i915_private *i915)
                 ratelimit_set_flags(&perf->spurious_report_rs,
                                     RATELIMIT_MSG_ON_RELEASE);
  
+               atomic64_set(&perf->noa_programming_delay,
+                            500 * 1000 /* 500us */);
+
                 perf->i915 = i915;
         }
  }
@@ -3680,3 +3900,7 @@ void i915_perf_fini(struct drm_i915_private *i915)
         memset(&perf->ops, 0, sizeof(perf->ops));
         perf->i915 = NULL;
  }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_perf.c"
+#endif
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h

index 337cd7d2ad779f49ece0dc9289664b8de5e488dc..d35a3c1946c35030ad7a6a2f9559db3606ae4107 100644 (file)
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -266,6 +266,12 @@ struct i915_perf_stream {
                  */
                 u32 head;
         } oa_buffer;
+
+       /**
+        * A batch buffer doing a wait on the GPU for the NOA logic to be
+        * reprogrammed.
+        */
+       struct i915_vma *noa_wait;
  };
  
  /**
@@ -385,6 +391,8 @@ struct i915_perf {
  
         struct i915_oa_ops ops;
         const struct i915_oa_format *oa_formats;
+
+       atomic64_t noa_programming_delay;
  };
  
  #endif /* _I915_PERF_TYPES_H_ */
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h

index 0fb9030b89f1d57b957e778e8c345b0835623b48..e24991e548973700f4abffc2bd6cb84fe43cd0a0 100644 (file)
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -545,7 +545,9 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
  #define MI_PREDICATE_SRC0_UDW  _MMIO(0x2400 + 4)
  #define MI_PREDICATE_SRC1      _MMIO(0x2408)
  #define MI_PREDICATE_SRC1_UDW  _MMIO(0x2408 + 4)
-
+#define MI_PREDICATE_DATA       _MMIO(0x2410)
+#define MI_PREDICATE_RESULT     _MMIO(0x2418)
+#define MI_PREDICATE_RESULT_1   _MMIO(0x241c)
  #define MI_PREDICATE_RESULT_2  _MMIO(0x2214)
  #define  LOWER_SLICE_ENABLED   (1 << 0)
  #define  LOWER_SLICE_DISABLED  (0 << 0)
diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h

index 6713efea350b706ea89a5ffc428857b7fc8b0595..6daf6599ec79d1a6a52819a464badcce80248557 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h
@@ -35,3 +35,4 @@ selftest(reset, intel_reset_live_selftests)
  selftest(hangcheck, intel_hangcheck_live_selftests)
  selftest(execlists, intel_execlists_live_selftests)
  selftest(guc, intel_guc_live_selftest)
+selftest(perf, i915_perf_live_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c

new file mode 100644 (file)

index 0000000..dc6d689
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/i915_perf.c
@@ -0,0 +1,216 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <linux/kref.h>
+
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_gt.h"
+
+#include "i915_selftest.h"
+
+#include "igt_flush_test.h"
+#include "lib_sw_fence.h"
+
+static struct i915_perf_stream *
+test_stream(struct i915_perf *perf)
+{
+       struct drm_i915_perf_open_param param = {};
+       struct perf_open_properties props = {
+               .engine = intel_engine_lookup_user(perf->i915,
+                                                  I915_ENGINE_CLASS_RENDER,
+                                                  0),
+               .sample_flags = SAMPLE_OA_REPORT,
+               .oa_format = I915_OA_FORMAT_C4_B8,
+               .metrics_set = 1,
+       };
+       struct i915_perf_stream *stream;
+
+       stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+       if (!stream)
+               return NULL;
+
+       stream->perf = perf;
+
+       mutex_lock(&perf->lock);
+       if (i915_oa_stream_init(stream, &param, &props)) {
+               kfree(stream);
+               stream =  NULL;
+       }
+       mutex_unlock(&perf->lock);
+
+       return stream;
+}
+
+static void stream_destroy(struct i915_perf_stream *stream)
+{
+       struct i915_perf *perf = stream->perf;
+
+       mutex_lock(&perf->lock);
+       i915_perf_destroy_locked(stream);
+       mutex_unlock(&perf->lock);
+}
+
+static int live_sanitycheck(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct i915_perf_stream *stream;
+
+       /* Quick check we can create a perf stream */
+
+       stream = test_stream(&i915->perf);
+       if (!stream)
+               return -EINVAL;
+
+       stream_destroy(stream);
+       return 0;
+}
+
+static int write_timestamp(struct i915_request *rq, int slot)
+{
+       u32 *cs;
+       int len;
+
+       cs = intel_ring_begin(rq, 6);
+       if (IS_ERR(cs))
+               return PTR_ERR(cs);
+
+       len = 5;
+       if (INTEL_GEN(rq->i915) >= 8)
+               len++;
+
+       *cs++ = GFX_OP_PIPE_CONTROL(len);
+       *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB |
+               PIPE_CONTROL_STORE_DATA_INDEX |
+               PIPE_CONTROL_WRITE_TIMESTAMP;
+       *cs++ = slot * sizeof(u32);
+       *cs++ = 0;
+       *cs++ = 0;
+       *cs++ = 0;
+
+       intel_ring_advance(rq, cs);
+
+       return 0;
+}
+
+static ktime_t poll_status(struct i915_request *rq, int slot)
+{
+       while (!intel_read_status_page(rq->engine, slot) &&
+              !i915_request_completed(rq))
+               cpu_relax();
+
+       return ktime_get();
+}
+
+static int live_noa_delay(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct i915_perf_stream *stream;
+       struct i915_request *rq;
+       ktime_t t0, t1;
+       u64 expected;
+       u32 delay;
+       int err;
+       int i;
+
+       /* Check that the GPU delays matches expectations */
+
+       stream = test_stream(&i915->perf);
+       if (!stream)
+               return -ENOMEM;
+
+       expected = atomic64_read(&stream->perf->noa_programming_delay);
+
+       if (stream->engine->class != RENDER_CLASS) {
+               err = -ENODEV;
+               goto out;
+       }
+
+       for (i = 0; i < 4; i++)
+               intel_write_status_page(stream->engine, 0x100 + i, 0);
+
+       rq = i915_request_create(stream->engine->kernel_context);
+       if (IS_ERR(rq)) {
+               err = PTR_ERR(rq);
+               goto out;
+       }
+
+       if (rq->engine->emit_init_breadcrumb &&
+           i915_request_timeline(rq)->has_initial_breadcrumb) {
+               err = rq->engine->emit_init_breadcrumb(rq);
+               if (err) {
+                       i915_request_add(rq);
+                       goto out;
+               }
+       }
+
+       err = write_timestamp(rq, 0x100);
+       if (err) {
+               i915_request_add(rq);
+               goto out;
+       }
+
+       err = rq->engine->emit_bb_start(rq,
+                                       i915_ggtt_offset(stream->noa_wait), 0,
+                                       I915_DISPATCH_SECURE);
+       if (err) {
+               i915_request_add(rq);
+               goto out;
+       }
+
+       err = write_timestamp(rq, 0x102);
+       if (err) {
+               i915_request_add(rq);
+               goto out;
+       }
+
+       i915_request_get(rq);
+       i915_request_add(rq);
+
+       preempt_disable();
+       t0 = poll_status(rq, 0x100);
+       t1 = poll_status(rq, 0x102);
+       preempt_enable();
+
+       pr_info("CPU delay: %lluns, expected %lluns\n",
+               ktime_sub(t1, t0), expected);
+
+       delay = intel_read_status_page(stream->engine, 0x102);
+       delay -= intel_read_status_page(stream->engine, 0x100);
+       delay = div_u64(mul_u32_u32(delay, 1000 * 1000),
+                       RUNTIME_INFO(i915)->cs_timestamp_frequency_khz);
+       pr_info("GPU delay: %uns, expected %lluns\n",
+               delay, expected);
+
+       if (4 * delay < 3 * expected || 2 * delay > 3 * expected) {
+               pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n",
+                      delay / 1000,
+                      div_u64(3 * expected, 4000),
+                      div_u64(3 * expected, 2000));
+               err = -EINVAL;
+       }
+
+       i915_request_put(rq);
+out:
+       stream_destroy(stream);
+       return err;
+}
+
+int i915_perf_live_selftests(struct drm_i915_private *i915)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(live_sanitycheck),
+               SUBTEST(live_noa_delay),
+       };
+       struct i915_perf *perf = &i915->perf;
+
+       if (!perf->metrics_kobj || !perf->ops.enable_metric_set)
+               return 0;
+
+       if (intel_gt_is_wedged(&i915->gt))
+               return 0;
+
+       return i915_subtests(tests, i915);
+}
author	Lionel Landwerlin <lionel.g.landwerlin@intel.com>
	Sat, 12 Oct 2019 07:23:07 +0000 (08:23 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Sat, 12 Oct 2019 08:08:33 +0000 (09:08 +0100)
drivers/gpu/drm/i915/gt/intel_gpu_commands.h		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_gt_types.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_debugfs.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_perf.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_perf_types.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_reg.h		patch \| blob \| history
drivers/gpu/drm/i915/selftests/i915_live_selftests.h		patch \| blob \| history
drivers/gpu/drm/i915/selftests/i915_perf.c	[new file with mode: 0644]	patch \| blob