]> www.infradead.org Git - users/dwmw2/linux.git/commitdiff
drm/i915: Flush TLBs before releasing backing store
authorTvrtko Ursulin <tvrtko.ursulin@intel.com>
Tue, 19 Oct 2021 12:27:10 +0000 (13:27 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 29 Jan 2022 09:19:18 +0000 (10:19 +0100)
commit 7938d61591d33394a21bdd7797a245b65428f44c upstream.

We need to flush TLBs before releasing backing store otherwise userspace
is able to encounter stale entries if a) it is not declaring access to
certain buffers and b) it races with the backing store release from a
such undeclared execution already executing on the GPU in parallel.

The approach taken is to mark any buffer objects which were ever bound
to the GPU and to trigger a serialized TLB flush when their backing
store is released.

Alternatively the flushing could be done on VMA unbind, at which point
we would be able to ascertain whether there is potential a parallel GPU
execution (which could race), but essentially it boils down to paying
the cost of TLB flushes potentially needlessly at VMA unbind time (when
the backing store is not known to be going away so not needed for
safety), versus potentially needlessly at backing store relase time
(since we at that point cannot tell whether there is anything executing
on the GPU which uses that object).

Thereforce simplicity of implementation has been chosen for now with
scope to benchmark and refine later as required.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reported-by: Sushma Venkatesh Reddy <sushma.venkatesh.reddy@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Dave Airlie <airlied@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jon Bloomfield <jon.bloomfield@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_object.h
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/i915_vma.c

index 37c80cfecd09791d31a8b3d7786c20117f2e999f..c25ee6a02d65e45f1cb122da5dea62a3e65a9daa 100644 (file)
@@ -1595,6 +1595,8 @@ struct drm_i915_private {
 
        struct intel_uncore uncore;
 
+       struct mutex tlb_invalidate_lock;
+
        struct i915_virtual_gpu vgpu;
 
        struct intel_gvt *gvt;
index c7d05ac7af3cb1a5e7c2164c444cca089283705f..5b0d6d8b3ab8ed34b6fd61a649f9fb94ff8c515e 100644 (file)
@@ -2446,6 +2446,78 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
        rcu_read_unlock();
 }
 
+struct reg_and_bit {
+       i915_reg_t reg;
+       u32 bit;
+};
+
+static struct reg_and_bit
+get_reg_and_bit(const struct intel_engine_cs *engine,
+               const i915_reg_t *regs, const unsigned int num)
+{
+       const unsigned int class = engine->class;
+       struct reg_and_bit rb = { .bit = 1 };
+
+       if (WARN_ON_ONCE(class >= num || !regs[class].reg))
+               return rb;
+
+       rb.reg = regs[class];
+       if (class == VIDEO_DECODE_CLASS)
+               rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */
+
+       return rb;
+}
+
+static void invalidate_tlbs(struct drm_i915_private *dev_priv)
+{
+       static const i915_reg_t gen8_regs[] = {
+               [RENDER_CLASS]                  = GEN8_RTCR,
+               [VIDEO_DECODE_CLASS]            = GEN8_M1TCR, /* , GEN8_M2TCR */
+               [VIDEO_ENHANCEMENT_CLASS]       = GEN8_VTCR,
+               [COPY_ENGINE_CLASS]             = GEN8_BTCR,
+       };
+       const unsigned int num = ARRAY_SIZE(gen8_regs);
+       const i915_reg_t *regs = gen8_regs;
+       struct intel_engine_cs *engine;
+       enum intel_engine_id id;
+
+       if (INTEL_GEN(dev_priv) < 8)
+               return;
+
+       GEM_TRACE("\n");
+
+       assert_rpm_wakelock_held(dev_priv);
+
+       mutex_lock(&dev_priv->tlb_invalidate_lock);
+       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
+       for_each_engine(engine, dev_priv, id) {
+               /*
+                * HW architecture suggest typical invalidation time at 40us,
+                * with pessimistic cases up to 100us and a recommendation to
+                * cap at 1ms. We go a bit higher just in case.
+                */
+               const unsigned int timeout_us = 100;
+               const unsigned int timeout_ms = 4;
+               struct reg_and_bit rb;
+
+               rb = get_reg_and_bit(engine, regs, num);
+               if (!i915_mmio_reg_offset(rb.reg))
+                       continue;
+
+               I915_WRITE_FW(rb.reg, rb.bit);
+               if (__intel_wait_for_register_fw(dev_priv,
+                                                rb.reg, rb.bit, 0,
+                                                timeout_us, timeout_ms,
+                                                NULL))
+                       DRM_ERROR_RATELIMITED("%s TLB invalidation did not complete in %ums!\n",
+                                             engine->name, timeout_ms);
+       }
+
+       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+       mutex_unlock(&dev_priv->tlb_invalidate_lock);
+}
+
 static struct sg_table *
 __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
 {
@@ -2475,6 +2547,15 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
        __i915_gem_object_reset_page_iter(obj);
        obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
 
+       if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) {
+               struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+               if (intel_runtime_pm_get_if_in_use(i915)) {
+                       invalidate_tlbs(i915);
+                       intel_runtime_pm_put(i915);
+               }
+       }
+
        return pages;
 }
 
@@ -5792,6 +5873,8 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv)
 
        spin_lock_init(&dev_priv->fb_tracking.lock);
 
+       mutex_init(&dev_priv->tlb_invalidate_lock);
+
        err = i915_gemfs_init(dev_priv);
        if (err)
                DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);
index 83e5e01fa9eaa9c8586445329089959290f2f32d..2e3a713e9bcd8bcdd385402fc2c9a4ef65247729 100644 (file)
@@ -136,6 +136,7 @@ struct drm_i915_gem_object {
         * activity?
         */
 #define I915_BO_ACTIVE_REF 0
+#define I915_BO_WAS_BOUND_BIT    1
 
        /*
         * Is the object to be mapped as read-only to the GPU
index a6f4f32dd71ce483fa583ed75d1ec1bbdf9975c6..830049985e56dfa475c236a47ee0f7baf0e1bbcd 100644 (file)
@@ -2431,6 +2431,12 @@ enum i915_power_well_id {
 #define   GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING     (1 << 28)
 #define   GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT       (1 << 24)
 
+#define GEN8_RTCR      _MMIO(0x4260)
+#define GEN8_M1TCR     _MMIO(0x4264)
+#define GEN8_M2TCR     _MMIO(0x4268)
+#define GEN8_BTCR      _MMIO(0x426c)
+#define GEN8_VTCR      _MMIO(0x4270)
+
 #if 0
 #define PRB0_TAIL      _MMIO(0x2030)
 #define PRB0_HEAD      _MMIO(0x2034)
index 98358b4b36dea7e13177bdf38554ffaad4f994e9..9aceacc43f4b749f5eb0f22786cbd9b4483e0a0f 100644 (file)
@@ -335,6 +335,10 @@ int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
                return ret;
 
        vma->flags |= bind_flags;
+
+       if (vma->obj)
+               set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags);
+
        return 0;
 }