/* Queue the GPU write cache flushing we need. */
        old_write_domain = obj->write_domain;
        i915_gem_flush(dev, 0, obj->write_domain);
+       BUG_ON(obj->write_domain);
 
        trace_i915_gem_object_change_domain(obj,
                                            obj->read_domains,
  * wait, as in modesetting process we're not supposed to be interrupted.
  */
 int
-i915_gem_object_set_to_display_plane(struct drm_gem_object *obj)
+i915_gem_object_set_to_display_plane(struct drm_gem_object *obj,
+                                    bool pipelined)
 {
        struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
        uint32_t old_read_domains;
        if (obj_priv->gtt_space == NULL)
                return -EINVAL;
 
-       ret = i915_gem_object_flush_gpu_write_domain(obj, true);
-       if (ret != 0)
+       ret = i915_gem_object_flush_gpu_write_domain(obj, pipelined);
+       if (ret)
                return ret;
 
        i915_gem_object_flush_cpu_write_domain(obj);
 
 }
 
 int
-intel_pin_and_fence_fb_obj(struct drm_device *dev, struct drm_gem_object *obj)
+intel_pin_and_fence_fb_obj(struct drm_device *dev,
+                          struct drm_gem_object *obj,
+                          bool pipelined)
 {
        struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
        u32 alignment;
        }
 
        ret = i915_gem_object_pin(obj, alignment);
-       if (ret != 0)
+       if (ret)
                return ret;
 
-       ret = i915_gem_object_set_to_display_plane(obj);
-       if (ret != 0) {
-               i915_gem_object_unpin(obj);
-               return ret;
-       }
+       ret = i915_gem_object_set_to_display_plane(obj, pipelined);
+       if (ret)
+               goto err_unpin;
 
        /* Install a fence for tiled scan-out. Pre-i965 always needs a
         * fence, whereas 965+ only requires a fence if using
        if (obj_priv->fence_reg == I915_FENCE_REG_NONE &&
            obj_priv->tiling_mode != I915_TILING_NONE) {
                ret = i915_gem_object_get_fence_reg(obj);
-               if (ret != 0) {
-                       i915_gem_object_unpin(obj);
-                       return ret;
-               }
+               if (ret)
+                       goto err_unpin;
        }
 
        return 0;
+
+err_unpin:
+       i915_gem_object_unpin(obj);
+       return ret;
 }
 
 /* Assume fb object is pinned & idle & fenced and just update base pointers */
        obj_priv = to_intel_bo(obj);
 
        mutex_lock(&dev->struct_mutex);
-       ret = intel_pin_and_fence_fb_obj(dev, obj);
+       ret = intel_pin_and_fence_fb_obj(dev, obj, false);
        if (ret != 0) {
                mutex_unlock(&dev->struct_mutex);
                return ret;
        struct intel_unpin_work *work;
        unsigned long flags, offset;
        int pipe = intel_crtc->pipe;
-       u32 pf, pipesrc;
+       u32 was_dirty, pf, pipesrc;
        int ret;
 
        work = kzalloc(sizeof *work, GFP_KERNEL);
        obj = intel_fb->obj;
 
        mutex_lock(&dev->struct_mutex);
-       ret = intel_pin_and_fence_fb_obj(dev, obj);
+       was_dirty = obj->write_domain & I915_GEM_GPU_DOMAINS;
+       ret = intel_pin_and_fence_fb_obj(dev, obj, true);
        if (ret)
                goto cleanup_work;
 
        atomic_inc(&obj_priv->pending_flip);
        work->pending_flip_obj = obj;
 
-       if (IS_GEN3(dev) || IS_GEN2(dev)) {
-               u32 flip_mask;
+       if (was_dirty || IS_GEN3(dev) || IS_GEN2(dev)) {
+               BEGIN_LP_RING(2);
+               if (IS_GEN3(dev) || IS_GEN2(dev)) {
+                       u32 flip_mask;
 
-               if (intel_crtc->plane)
-                       flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
-               else
-                       flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
+                       /* Can't queue multiple flips, so wait for the previous
+                        * one to finish before executing the next.
+                        */
 
-               BEGIN_LP_RING(2);
-               OUT_RING(MI_WAIT_FOR_EVENT | flip_mask);
-               OUT_RING(0);
+                       if (intel_crtc->plane)
+                               flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
+                       else
+                               flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;
+
+                       OUT_RING(MI_WAIT_FOR_EVENT | flip_mask);
+               } else
+                       OUT_RING(MI_NOOP);
+               OUT_RING(MI_FLUSH);
                ADVANCE_LP_RING();
        }