cmd = MI_FLUSH_DW + 1;
 
-       if (ring == &dev_priv->ring[VCS]) {
-               if (invalidate_domains & I915_GEM_GPU_DOMAINS)
-                       cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
-                               MI_FLUSH_DW_STORE_INDEX |
-                               MI_FLUSH_DW_OP_STOREDW;
-       } else {
-               if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
-                       cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
-                               MI_FLUSH_DW_OP_STOREDW;
+       /* We always require a command barrier so that subsequent
+        * commands, such as breadcrumb interrupts, are strictly ordered
+        * wrt the contents of the write cache being flushed to memory
+        * (and thus being coherent from the CPU).
+        */
+       cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
+       if (invalidate_domains & I915_GEM_GPU_DOMAINS) {
+               cmd |= MI_INVALIDATE_TLB;
+               if (ring == &dev_priv->ring[VCS])
+                       cmd |= MI_INVALIDATE_BSD;
        }
 
        intel_logical_ring_emit(ringbuf, cmd);
 
        cmd = MI_FLUSH_DW;
        if (INTEL_INFO(ring->dev)->gen >= 8)
                cmd += 1;
+
+       /* We always require a command barrier so that subsequent
+        * commands, such as breadcrumb interrupts, are strictly ordered
+        * wrt the contents of the write cache being flushed to memory
+        * (and thus being coherent from the CPU).
+        */
+       cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
        /*
         * Bspec vol 1c.5 - video engine command streamer:
         * "If ENABLED, all TLBs will be invalidated once the flush
         * Post-Sync Operation field is a value of 1h or 3h."
         */
        if (invalidate & I915_GEM_GPU_DOMAINS)
-               cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
-                       MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+               cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
+
        intel_ring_emit(ring, cmd);
        intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
        if (INTEL_INFO(ring->dev)->gen >= 8) {
        cmd = MI_FLUSH_DW;
        if (INTEL_INFO(ring->dev)->gen >= 8)
                cmd += 1;
+
+       /* We always require a command barrier so that subsequent
+        * commands, such as breadcrumb interrupts, are strictly ordered
+        * wrt the contents of the write cache being flushed to memory
+        * (and thus being coherent from the CPU).
+        */
+       cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
+
        /*
         * Bspec vol 1c.3 - blitter engine command streamer:
         * "If ENABLED, all TLBs will be invalidated once the flush
         * Post-Sync Operation field is a value of 1h or 3h."
         */
        if (invalidate & I915_GEM_DOMAIN_RENDER)
-               cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
-                       MI_FLUSH_DW_OP_STOREDW;
+               cmd |= MI_INVALIDATE_TLB;
        intel_ring_emit(ring, cmd);
        intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
        if (INTEL_INFO(ring->dev)->gen >= 8) {