For ease of use (i.e. avoiding a few checks and function calls), store
the object's cache coherency next to the cache is dirty bit.
Specifically this patch aims to reduce the frequency of no-op calls to
i915_gem_object_clflush() to counter-act the increase of such calls for
GPU only objects in the previous patch.
v2: Replace cache_dirty & ~cache_coherent with cache_dirty &&
!cache_coherent as gcc generates much better code for the latter
(Tvrtko)
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Dongwon Kim <dongwon.kim@intel.com>
Cc: Matt Roper <matthew.d.roper@intel.com>
Tested-by: Dongwon Kim <dongwon.kim@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170616105455.16977-1-chris@chris-wilson.co.uk
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
        if (obj->cache_dirty)
                return false;
 
-       if (!i915_gem_object_is_coherent(obj))
+       if (!obj->cache_coherent)
                return true;
 
        return obj->pin_display;
 
        if (needs_clflush &&
            (obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
-           !i915_gem_object_is_coherent(obj))
+           !obj->cache_coherent)
                drm_clflush_sg(pages);
 
        __start_cpu_write(obj);
        if (ret)
                return ret;
 
-       if (i915_gem_object_is_coherent(obj) ||
-           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+       if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
                ret = i915_gem_object_set_to_cpu_domain(obj, false);
                if (ret)
                        goto err_unpin;
        if (ret)
                return ret;
 
-       if (i915_gem_object_is_coherent(obj) ||
-           !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+       if (obj->cache_coherent || !static_cpu_has(X86_FEATURE_CLFLUSH)) {
                ret = i915_gem_object_set_to_cpu_domain(obj, true);
                if (ret)
                        goto err_unpin;
        list_for_each_entry(vma, &obj->vma_list, obj_link)
                vma->node.color = cache_level;
        obj->cache_level = cache_level;
+       obj->cache_coherent = i915_gem_object_is_coherent(obj);
        obj->cache_dirty = true; /* Always invalidate stale cachelines */
 
        return 0;
        } else
                obj->cache_level = I915_CACHE_NONE;
 
-       obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+       obj->cache_coherent = i915_gem_object_is_coherent(obj);
+       obj->cache_dirty = !obj->cache_coherent;
 
        trace_i915_gem_object_create(obj);
 
 
         * snooping behaviour occurs naturally as the result of our domain
         * tracking.
         */
-       if (!(flags & I915_CLFLUSH_FORCE) && i915_gem_object_is_coherent(obj))
+       if (!(flags & I915_CLFLUSH_FORCE) && obj->cache_coherent)
                return;
 
        trace_i915_gem_object_clflush(obj);
 
                if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
                        continue;
 
-               if (obj->cache_dirty)
+               if (unlikely(obj->cache_dirty && !obj->cache_coherent))
                        i915_gem_clflush_object(obj, 0);
 
                ret = i915_gem_request_await_object
 
        obj->base.read_domains = I915_GEM_DOMAIN_CPU;
        obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
-       obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+       obj->cache_coherent = i915_gem_object_is_coherent(obj);
+       obj->cache_dirty = !obj->cache_coherent;
 
        return obj;
 }
 
        unsigned long gt_ro:1;
        unsigned int cache_level:3;
        unsigned int cache_dirty:1;
+       unsigned int cache_coherent:1;
 
        atomic_t frontbuffer_bits;
        unsigned int frontbuffer_ggtt_origin; /* write once */
 
        obj->stolen = stolen;
        obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
        obj->cache_level = HAS_LLC(dev_priv) ? I915_CACHE_LLC : I915_CACHE_NONE;
+       obj->cache_coherent = true; /* assumptions! more like cache_oblivious */
 
        if (i915_gem_object_pin_pages(obj))
                goto cleanup;
 
        obj->base.read_domains = I915_GEM_DOMAIN_CPU;
        obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        obj->cache_level = I915_CACHE_LLC;
-       obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+       obj->cache_coherent = i915_gem_object_is_coherent(obj);
+       obj->cache_dirty = !obj->cache_coherent;
 
        obj->userptr.ptr = args->user_ptr;
        obj->userptr.read_only = !!(args->flags & I915_USERPTR_READ_ONLY);
 
        obj->base.read_domains = I915_GEM_DOMAIN_CPU;
        obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
-       obj->cache_dirty = !i915_gem_object_is_coherent(obj);
+       obj->cache_coherent = i915_gem_object_is_coherent(obj);
+       obj->cache_dirty = !obj->cache_coherent;
        obj->scratch = phys_size;
 
        return obj;