ring->tail, ring->size);
 }
 
-static void __intel_engine_submit(struct intel_engine_cs *engine)
-{
-       struct intel_ring *ring = engine->buffer;
-
-       ring->tail &= ring->size - 1;
-       engine->write_tail(engine, ring->tail);
-}
-
 static int
 gen2_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 {
        return gen8_emit_pipe_control(req, flags, scratch_addr);
 }
 
-static void ring_write_tail(struct intel_engine_cs *engine,
-                           u32 value)
-{
-       struct drm_i915_private *dev_priv = engine->i915;
-       I915_WRITE_TAIL(engine, value);
-}
-
 u64 intel_engine_get_active_head(struct intel_engine_cs *engine)
 {
        struct drm_i915_private *dev_priv = engine->i915;
 
        I915_WRITE_CTL(engine, 0);
        I915_WRITE_HEAD(engine, 0);
-       engine->write_tail(engine, 0);
+       I915_WRITE_TAIL(engine, 0);
 
        if (!IS_GEN2(dev_priv)) {
                (void)I915_READ_CTL(engine);
        intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
        intel_ring_emit(ring, req->fence.seqno);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
-       __intel_engine_submit(engine);
+       intel_ring_advance(ring);
+
+       req->tail = ring->tail;
+       engine->submit_request(req);
 
        return 0;
 }
        intel_ring_emit(ring, 0);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
        intel_ring_emit(ring, MI_NOOP);
-       __intel_engine_submit(engine);
+
+       req->tail = ring->tail;
+       engine->submit_request(req);
 
        return 0;
 }
        intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
        intel_ring_emit(ring, req->fence.seqno);
        intel_ring_emit(ring, MI_USER_INTERRUPT);
-       __intel_engine_submit(req->engine);
+       intel_ring_advance(ring);
+
+       req->tail = ring->tail;
+       req->engine->submit_request(req);
 
        return 0;
 }
 
+static void i9xx_submit_request(struct drm_i915_gem_request *request)
+{
+       struct drm_i915_private *dev_priv = request->i915;
+
+       I915_WRITE_TAIL(request->engine, request->tail);
+}
+
 static void
 gen6_irq_enable(struct intel_engine_cs *engine)
 {
        rcu_read_unlock();
 }
 
-static void gen6_bsd_ring_write_tail(struct intel_engine_cs *engine,
-                                    u32 value)
+static void gen6_bsd_submit_request(struct drm_i915_gem_request *request)
 {
-       struct drm_i915_private *dev_priv = engine->i915;
+       struct drm_i915_private *dev_priv = request->i915;
 
        intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
                DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
 
        /* Now that the ring is fully powered up, update the tail */
-       I915_WRITE_FW(RING_TAIL(engine->mmio_base), value);
-       POSTING_READ_FW(RING_TAIL(engine->mmio_base));
+       I915_WRITE_FW(RING_TAIL(request->engine->mmio_base), request->tail);
+       POSTING_READ_FW(RING_TAIL(request->engine->mmio_base));
 
        /* Let the ring send IDLE messages to the GT again,
         * and so let it sleep to conserve power when idle.
                                      struct intel_engine_cs *engine)
 {
        engine->init_hw = init_ring_common;
-       engine->write_tail = ring_write_tail;
+       engine->submit_request = i9xx_submit_request;
 
        engine->add_request = i9xx_add_request;
        if (INTEL_GEN(dev_priv) >= 6)
        if (INTEL_GEN(dev_priv) >= 6) {
                /* gen6 bsd needs a special wa for tail updates */
                if (IS_GEN6(dev_priv))
-                       engine->write_tail = gen6_bsd_ring_write_tail;
+                       engine->submit_request = gen6_bsd_submit_request;
                engine->emit_flush = gen6_bsd_ring_flush;
                if (INTEL_GEN(dev_priv) < 8)
                        engine->irq_enable_mask = GT_BSD_USER_INTERRUPT;
 
 
        int             (*init_context)(struct drm_i915_gem_request *req);
 
-       void            (*write_tail)(struct intel_engine_cs *engine,
-                                     u32 value);
        int             (*add_request)(struct drm_i915_gem_request *req);
        /* Some chipsets are not quite as coherent as advertised and need
         * an expensive kick to force a true read of the up-to-date seqno.
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
 #define I915_DISPATCH_RS     0x4
+       void            (*submit_request)(struct drm_i915_gem_request *req);
 
        /**
         * List of objects currently involved in rendering from the
 
 static inline void intel_ring_advance(struct intel_ring *ring)
 {
+       /* The modulus is required so that we avoid writing
+        * request->tail == ring->size, rather than the expected 0,
+        * into the RING_TAIL register as that can cause a GPU hang.
+        * As this is only strictly required for the request->tail,
+        * and only then as we write the value into hardware, we can
+        * one day remove the modulus after every command packet.
+        */
        ring->tail &= ring->size - 1;
 }