engine->name,
                                           i915_gem_request_get_seqno(work->flip_queued_req),
                                           dev_priv->next_seqno,
-                                          engine->get_seqno(engine, true),
+                                          engine->get_seqno(engine),
                                           i915_gem_request_completed(work->flip_queued_req, true));
                        } else
                                seq_printf(m, "Flip not associated with any ring\n");
 {
        if (engine->get_seqno) {
                seq_printf(m, "Current sequence (%s): %x\n",
-                          engine->name, engine->get_seqno(engine, false));
+                          engine->name, engine->get_seqno(engine));
        }
 }
 
        intel_runtime_pm_get(dev_priv);
 
        for_each_engine_id(engine, dev_priv, id) {
-               seqno[id] = engine->get_seqno(engine, false);
                acthd[id] = intel_ring_get_active_head(engine);
+               seqno[id] = engine->get_seqno(engine);
        }
 
        i915_get_extra_instdone(dev, instdone);
 
 static inline bool i915_gem_request_started(struct drm_i915_gem_request *req,
                                           bool lazy_coherency)
 {
-       u32 seqno = req->engine->get_seqno(req->engine, lazy_coherency);
-       return i915_seqno_passed(seqno, req->previous_seqno);
+       if (!lazy_coherency && req->engine->irq_seqno_barrier)
+               req->engine->irq_seqno_barrier(req->engine);
+       return i915_seqno_passed(req->engine->get_seqno(req->engine),
+                                req->previous_seqno);
 }
 
 static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
                                              bool lazy_coherency)
 {
-       u32 seqno = req->engine->get_seqno(req->engine, lazy_coherency);
-       return i915_seqno_passed(seqno, req->seqno);
+       if (!lazy_coherency && req->engine->irq_seqno_barrier)
+               req->engine->irq_seqno_barrier(req->engine);
+       return i915_seqno_passed(req->engine->get_seqno(req->engine),
+                                req->seqno);
 }
 
 int __must_check i915_gem_get_seqno(struct drm_device *dev, u32 *seqno);
 
 
        ering->waiting = waitqueue_active(&engine->irq_queue);
        ering->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
-       ering->seqno = engine->get_seqno(engine, false);
        ering->acthd = intel_ring_get_active_head(engine);
+       ering->seqno = engine->get_seqno(engine);
        ering->last_seqno = engine->last_submitted_seqno;
        ering->start = I915_READ_START(engine);
        ering->head = I915_READ_HEAD(engine);
 
        if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES)
                return -1;
 
-       if (i915_seqno_passed(signaller->get_seqno(signaller, false), seqno))
+       if (i915_seqno_passed(signaller->get_seqno(signaller), seqno))
                return 1;
 
        /* cursory check for an unkickable deadlock */
 
                semaphore_clear_deadlocks(dev_priv);
 
-               seqno = engine->get_seqno(engine, false);
+               /* We don't strictly need an irq-barrier here, as we are not
+                * serving an interrupt request, be paranoid in case the
+                * barrier has side-effects (such as preventing a broken
+                * cacheline snoop) and so be sure that we can see the seqno
+                * advance. If the seqno should stick, due to a stale
+                * cacheline, we would erroneously declare the GPU hung.
+                */
+               if (engine->irq_seqno_barrier)
+                       engine->irq_seqno_barrier(engine);
+
                acthd = intel_ring_get_active_head(engine);
+               seqno = engine->get_seqno(engine);
 
                if (engine->hangcheck.seqno == seqno) {
                        if (ring_idle(engine, seqno)) {
 
            TP_fast_assign(
                           __entry->dev = engine->dev->primary->index;
                           __entry->ring = engine->id;
-                          __entry->seqno = engine->get_seqno(engine, false);
+                          __entry->seqno = engine->get_seqno(engine);
                           ),
 
            TP_printk("dev=%u, ring=%u, seqno=%u",
 
        return 0;
 }
 
-static u32 gen8_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency)
+static u32 gen8_get_seqno(struct intel_engine_cs *engine)
 {
        return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
 }
        intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
 }
 
-static u32 bxt_a_get_seqno(struct intel_engine_cs *engine,
-                          bool lazy_coherency)
+static void bxt_a_seqno_barrier(struct intel_engine_cs *engine)
 {
-
        /*
         * On BXT A steppings there is a HW coherency issue whereby the
         * MI_STORE_DATA_IMM storing the completed request's seqno
         * bxt_a_set_seqno(), where we also do a clflush after the write. So
         * this clflush in practice becomes an invalidate operation.
         */
-
-       if (!lazy_coherency)
-               intel_flush_status_page(engine, I915_GEM_HWS_INDEX);
-
-       return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
+       intel_flush_status_page(engine, I915_GEM_HWS_INDEX);
 }
 
 static void bxt_a_set_seqno(struct intel_engine_cs *engine, u32 seqno)
        engine->irq_get = gen8_logical_ring_get_irq;
        engine->irq_put = gen8_logical_ring_put_irq;
        engine->emit_bb_start = gen8_emit_bb_start;
+       engine->get_seqno = gen8_get_seqno;
+       engine->set_seqno = gen8_set_seqno;
        if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
-               engine->get_seqno = bxt_a_get_seqno;
+               engine->irq_seqno_barrier = bxt_a_seqno_barrier;
                engine->set_seqno = bxt_a_set_seqno;
-       } else {
-               engine->get_seqno = gen8_get_seqno;
-               engine->set_seqno = gen8_set_seqno;
        }
 }
 
 
        return 0;
 }
 
-static u32
-gen6_ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency)
+static void
+gen6_seqno_barrier(struct intel_engine_cs *engine)
 {
        /* Workaround to force correct ordering between irq and seqno writes on
         * ivb (and maybe also on snb) by reading from a CS register (like
         * batch i.e. much more frequent than a delay when waiting for the
         * interrupt (with the same net latency).
         */
-       if (!lazy_coherency) {
-               struct drm_i915_private *dev_priv = engine->dev->dev_private;
-               POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
-       }
-
-       return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
+       struct drm_i915_private *dev_priv = engine->dev->dev_private;
+       POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
 }
 
 static u32
-ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency)
+ring_get_seqno(struct intel_engine_cs *engine)
 {
        return intel_read_status_page(engine, I915_GEM_HWS_INDEX);
 }
 }
 
 static u32
-pc_render_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency)
+pc_render_get_seqno(struct intel_engine_cs *engine)
 {
        return engine->scratch.cpu_page[0];
 }
                engine->irq_get = gen8_ring_get_irq;
                engine->irq_put = gen8_ring_put_irq;
                engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
-               engine->get_seqno = gen6_ring_get_seqno;
+               engine->irq_seqno_barrier = gen6_seqno_barrier;
+               engine->get_seqno = ring_get_seqno;
                engine->set_seqno = ring_set_seqno;
                if (i915_semaphore_is_enabled(dev)) {
                        WARN_ON(!dev_priv->semaphore_obj);
                engine->irq_get = gen6_ring_get_irq;
                engine->irq_put = gen6_ring_put_irq;
                engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
-               engine->get_seqno = gen6_ring_get_seqno;
+               engine->irq_seqno_barrier = gen6_seqno_barrier;
+               engine->get_seqno = ring_get_seqno;
                engine->set_seqno = ring_set_seqno;
                if (i915_semaphore_is_enabled(dev)) {
                        engine->semaphore.sync_to = gen6_ring_sync;
                        engine->write_tail = gen6_bsd_ring_write_tail;
                engine->flush = gen6_bsd_ring_flush;
                engine->add_request = gen6_add_request;
-               engine->get_seqno = gen6_ring_get_seqno;
+               engine->irq_seqno_barrier = gen6_seqno_barrier;
+               engine->get_seqno = ring_get_seqno;
                engine->set_seqno = ring_set_seqno;
                if (INTEL_INFO(dev)->gen >= 8) {
                        engine->irq_enable_mask =
        engine->mmio_base = GEN8_BSD2_RING_BASE;
        engine->flush = gen6_bsd_ring_flush;
        engine->add_request = gen6_add_request;
-       engine->get_seqno = gen6_ring_get_seqno;
+       engine->irq_seqno_barrier = gen6_seqno_barrier;
+       engine->get_seqno = ring_get_seqno;
        engine->set_seqno = ring_set_seqno;
        engine->irq_enable_mask =
                        GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
        engine->write_tail = ring_write_tail;
        engine->flush = gen6_ring_flush;
        engine->add_request = gen6_add_request;
-       engine->get_seqno = gen6_ring_get_seqno;
+       engine->irq_seqno_barrier = gen6_seqno_barrier;
+       engine->get_seqno = ring_get_seqno;
        engine->set_seqno = ring_set_seqno;
        if (INTEL_INFO(dev)->gen >= 8) {
                engine->irq_enable_mask =
        engine->write_tail = ring_write_tail;
        engine->flush = gen6_ring_flush;
        engine->add_request = gen6_add_request;
-       engine->get_seqno = gen6_ring_get_seqno;
+       engine->irq_seqno_barrier = gen6_seqno_barrier;
+       engine->get_seqno = ring_get_seqno;
        engine->set_seqno = ring_set_seqno;
 
        if (INTEL_INFO(dev)->gen >= 8) {
 
         * seen value is good enough. Note that the seqno will always be
         * monotonic, even if not coherent.
         */
-       u32             (*get_seqno)(struct intel_engine_cs *ring,
-                                    bool lazy_coherency);
+       void            (*irq_seqno_barrier)(struct intel_engine_cs *ring);
+       u32             (*get_seqno)(struct intel_engine_cs *ring);
        void            (*set_seqno)(struct intel_engine_cs *ring,
                                     u32 seqno);
        int             (*dispatch_execbuffer)(struct drm_i915_gem_request *req,