return fence - dev_priv->fence_regs;
 }
 
+static void i915_gem_write_fence__ipi(void *data)
+{
+       wbinvd();
+}
+
 static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
                                         struct drm_i915_fence_reg *fence,
                                         bool enable)
 {
-       struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
-       int reg = fence_number(dev_priv, fence);
-
-       i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL);
+       struct drm_device *dev = obj->base.dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       int fence_reg = fence_number(dev_priv, fence);
+
+       /* In order to fully serialize access to the fenced region and
+        * the update to the fence register we need to take extreme
+        * measures on SNB+. In theory, the write to the fence register
+        * flushes all memory transactions before, and coupled with the
+        * mb() placed around the register write we serialise all memory
+        * operations with respect to the changes in the tiler. Yet, on
+        * SNB+ we need to take a step further and emit an explicit wbinvd()
+        * on each processor in order to manually flush all memory
+        * transactions before updating the fence register.
+        */
+       if (HAS_LLC(obj->base.dev))
+               on_each_cpu(i915_gem_write_fence__ipi, NULL, 1);
+       i915_gem_write_fence(dev, fence_reg, enable ? obj : NULL);
 
        if (enable) {
-               obj->fence_reg = reg;
+               obj->fence_reg = fence_reg;
                fence->obj = obj;
                list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list);
        } else {