On Braswell and Broxton (also known as Valleyview and Apollolake), we
need to serialise updates of the GGTT using the big stop_machine()
hammer. This has the side effect of appearing to lockdep as a possible
reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu
allocations). However, we want to use vm->mutex (including ggtt->mutex)
from within the shrinker and so must avoid such possible taints. For this
purpose, we introduced the asynchronous vma binding and we can apply it
to the PIN_GLOBAL so long as take care to add the necessary waits for
the worker afterwards.
Closes: https://gitlab.freedesktop.org/drm/intel/issues/211
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200130181710.2030251-3-chris@chris-wilson.co.uk
 
 {
        unsigned int flags;
 
-       flags = PIN_GLOBAL;
        if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt))
                /*
                 * On g33, we cannot place HWS above 256MiB, so
                 * above the mappable region (even though we never
                 * actually map it).
                 */
-               flags |= PIN_MAPPABLE;
+               flags = PIN_MAPPABLE;
        else
-               flags |= PIN_HIGH;
+               flags = PIN_HIGH;
 
-       return i915_vma_pin(vma, 0, 0, flags);
+       return i915_ggtt_pin(vma, 0, flags);
 }
 
 static int init_status_page(struct intel_engine_cs *engine)
 
 
 void i915_ggtt_suspend(struct i915_ggtt *ggtt)
 {
+       struct i915_vma *vma;
+
+       list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
+               i915_vma_wait_for_bind(vma);
+
        ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total);
        ggtt->invalidate(ggtt);
 
            IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) {
                ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
                ggtt->vm.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
+               ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND;
        }
 
        ggtt->invalidate = gen8_ggtt_invalidate;
 
                goto err_unref;
        }
 
-       ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+       ret = i915_ggtt_pin(vma, 0, PIN_HIGH);
        if (ret)
                goto err_unref;
 
 
                goto err;
        }
 
-       err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+       err = i915_ggtt_pin(vma, 0, PIN_HIGH);
        if (err)
                goto err;
 
 
        if (atomic_fetch_inc(&ring->pin_count))
                return 0;
 
-       flags = PIN_GLOBAL;
-
        /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
-       flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+       flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
 
        if (vma->obj->stolen)
                flags |= PIN_MAPPABLE;
        else
                flags |= PIN_HIGH;
 
-       ret = i915_vma_pin(vma, 0, 0, flags);
+       ret = i915_ggtt_pin(vma, 0, flags);
        if (unlikely(ret))
                goto err_unpin;
 
 
        if (atomic_add_unless(&tl->pin_count, 1, 0))
                return 0;
 
-       err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH);
+       err = i915_ggtt_pin(tl->hwsp_ggtt, 0, PIN_HIGH);
        if (err)
                return err;
 
                goto err_rollback;
        }
 
-       err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+       err = i915_ggtt_pin(vma, 0, PIN_HIGH);
        if (err) {
                __idle_hwsp_free(vma->private, cacheline);
                goto err_rollback;
 
        if (IS_ERR(vma))
                goto err;
 
-       flags = PIN_GLOBAL | PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
-       ret = i915_vma_pin(vma, 0, 0, flags);
+       flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma);
+       ret = i915_ggtt_pin(vma, 0, flags);
        if (ret) {
                vma = ERR_PTR(ret);
                goto err;
 
        return err;
 }
 
-void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
+struct dma_fence *
+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
 {
+       struct dma_fence *prev;
+
        /* We expect the caller to manage the exclusive timeline ordering */
        GEM_BUG_ON(i915_active_is_idle(ref));
 
-       if (!__i915_active_fence_set(&ref->excl, f))
+       prev = __i915_active_fence_set(&ref->excl, f);
+       if (!prev)
                atomic_inc(&ref->count);
+
+       return prev;
 }
 
 bool i915_active_acquire_if_busy(struct i915_active *ref)
 
        return i915_active_ref(ref, i915_request_timeline(rq), &rq->fence);
 }
 
-void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
+struct dma_fence *
+i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f);
 
 static inline bool i915_active_has_exclusive(struct i915_active *ref)
 {
 
        if (ret)
                return ERR_PTR(ret);
 
+       ret = i915_vma_wait_for_bind(vma);
+       if (ret) {
+               i915_vma_unpin(vma);
+               return ERR_PTR(ret);
+       }
+
        return vma;
 }
 
 
        struct dma_fence_work base;
        struct i915_vma *vma;
        struct drm_i915_gem_object *pinned;
+       struct i915_sw_dma_fence_cb cb;
        enum i915_cache_level cache_level;
        unsigned int flags;
 };
        return vw;
 }
 
+int i915_vma_wait_for_bind(struct i915_vma *vma)
+{
+       int err = 0;
+
+       if (rcu_access_pointer(vma->active.excl.fence)) {
+               struct dma_fence *fence;
+
+               rcu_read_lock();
+               fence = dma_fence_get_rcu_safe(&vma->active.excl.fence);
+               rcu_read_unlock();
+               if (fence) {
+                       err = dma_fence_wait(fence, MAX_SCHEDULE_TIMEOUT);
+                       dma_fence_put(fence);
+               }
+       }
+
+       return err;
+}
+
 /**
  * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
  * @vma: VMA to map
 
        trace_i915_vma_bind(vma, bind_flags);
        if (work && (bind_flags & ~vma_flags) & vma->vm->bind_async_flags) {
+               struct dma_fence *prev;
+
                work->vma = vma;
                work->cache_level = cache_level;
                work->flags = bind_flags | I915_VMA_ALLOC;
                 * part of the obj->resv->excl_fence as it only affects
                 * execution and not content or object's backing store lifetime.
                 */
-               GEM_BUG_ON(i915_active_has_exclusive(&vma->active));
-               i915_active_set_exclusive(&vma->active, &work->base.dma);
+               prev = i915_active_set_exclusive(&vma->active, &work->base.dma);
+               if (prev)
+                       __i915_sw_fence_await_dma_fence(&work->base.chain,
+                                                       prev,
+                                                       &work->cb);
+
                work->base.dma.error = 0; /* enable the queue_work() */
 
                if (vma->obj) {
                        work->pinned = vma->obj;
                }
        } else {
-               GEM_BUG_ON((bind_flags & ~vma_flags) & vma->vm->bind_async_flags);
                ret = vma->ops->bind_vma(vma, cache_level, bind_flags);
                if (ret)
                        return ret;
 
        do {
                err = i915_vma_pin(vma, 0, align, flags | PIN_GLOBAL);
-               if (err != -ENOSPC)
+               if (err != -ENOSPC) {
+                       if (!err) {
+                               err = i915_vma_wait_for_bind(vma);
+                               if (err)
+                                       i915_vma_unpin(vma);
+                       }
                        return err;
+               }
 
                /* Unlike i915_vma_pin, we don't take no for an answer! */
                flush_idle_contexts(vm->gt);
 
 void i915_vma_make_shrinkable(struct i915_vma *vma);
 void i915_vma_make_purgeable(struct i915_vma *vma);
 
+int i915_vma_wait_for_bind(struct i915_vma *vma);
+
 static inline int i915_vma_sync(struct i915_vma *vma)
 {
        /* Wait for the asynchronous bindings and pending GPU reads */