int shmem_page_offset, page_length, ret = 0;
        int obj_do_bit17_swizzling, page_do_bit17_swizzling;
        int hit_slowpath = 0;
+       int needs_clflush = 0;
 
        user_data = (char __user *) (uintptr_t) args->data_ptr;
        remain = args->size;
 
        obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
 
+       if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
+               /* If we're not in the cpu read domain, set ourself into the gtt
+                * read domain and manually flush cachelines (if required). This
+                * optimizes for the case when the gpu will dirty the data
+                * anyway again before the next pread happens. */
+               if (obj->cache_level == I915_CACHE_NONE)
+                       needs_clflush = 1;
+               ret = i915_gem_object_set_to_gtt_domain(obj, false);
+               if (ret)
+                       return ret;
+       }
+
        offset = args->offset;
 
        while (remain > 0) {
 
                if (!page_do_bit17_swizzling) {
                        vaddr = kmap_atomic(page);
+                       if (needs_clflush)
+                               drm_clflush_virt_range(vaddr + shmem_page_offset,
+                                                      page_length);
                        ret = __copy_to_user_inatomic(user_data,
                                                      vaddr + shmem_page_offset,
                                                      page_length);
                mutex_unlock(&dev->struct_mutex);
 
                vaddr = kmap(page);
+               if (needs_clflush)
+                       drm_clflush_virt_range(vaddr + shmem_page_offset,
+                                              page_length);
+
                if (page_do_bit17_swizzling)
                        ret = __copy_to_user_swizzled(user_data,
                                                      vaddr, shmem_page_offset,
 
        trace_i915_gem_object_pread(obj, args->offset, args->size);
 
-       ret = i915_gem_object_set_cpu_read_domain_range(obj,
-                                                       args->offset,
-                                                       args->size);
-       if (ret)
-               goto out;
-
        ret = i915_gem_shmem_pread(dev, obj, args, file);
 
 out: