Consider two tasks that are running in parallel on a pair of engines
(vcs0, vcs1), but then must complete on a shared engine (rcs0). To
maximise throughput, we want to run the first ready task on rcs0 (i.e.
the first task that completes on either of vcs0 or vcs1). When using
semaphores, however, we will instead queue onto rcs in submission order.
To resolve this incorrect ordering, we want to re-evaluate the priority
queue when each of the request is ready. Normally this happens because
we only insert into the priority queue requests that are ready, but with
semaphores we are inserting ahead of their readiness and to compensate
we penalize those tasks with reduced priority (so that tasks that do not
need to busywait should naturally be run first). However, given a series
of tasks that each use semaphores, the queue degrades into submission
fifo rather than readiness fifo, and so to counter this we give a small
boost to semaphore users as their dependent tasks are completed (and so
we no longer require any busywait prior to running the user task as they
are then ready themselves).
v2: Fixup irqsave for schedule_lock (Tvrtko)
Testcase: igt/gem_exec_schedule/semaphore-codependency
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Dmitry Ermilov <dmitry.ermilov@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190409152922.23894-1-chris@chris-wilson.co.uk
        return NOTIFY_DONE;
 }
 
+static int __i915_sw_fence_call
+semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+       struct i915_request *request =
+               container_of(fence, typeof(*request), semaphore);
+
+       switch (state) {
+       case FENCE_COMPLETE:
+               /*
+                * We only check a small portion of our dependencies
+                * and so cannot guarantee that there remains no
+                * semaphore chain across all. Instead of opting
+                * for the full NOSEMAPHORE boost, we go for the
+                * smaller (but still preempting) boost of
+                * NEWCLIENT. This will be enough to boost over
+                * a busywaiting request (as that cannot be
+                * NEWCLIENT) without accidentally boosting
+                * a busywait over real work elsewhere.
+                */
+               i915_schedule_bump_priority(request, I915_PRIORITY_NEWCLIENT);
+               break;
+
+       case FENCE_FREE:
+               i915_request_put(request);
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
 static void ring_retire_requests(struct intel_ring *ring)
 {
        struct i915_request *rq, *rn;
 
        /* We bump the ref for the fence chain */
        i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
+       i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify);
 
        i915_sched_node_init(&rq->sched);
 
                                                     &from->fence, 0,
                                                     I915_FENCE_GFP);
 
+       err = i915_sw_fence_await_dma_fence(&to->semaphore,
+                                           &from->fence, 0,
+                                           I915_FENCE_GFP);
+       if (err < 0)
+               return err;
+
        /* We need to pin the signaler's HWSP until we are finished reading. */
        err = i915_timeline_read_hwsp(from, to, &hwsp_offset);
        if (err)
         * run at the earliest possible convenience.
         */
        local_bh_disable();
+       i915_sw_fence_commit(&request->semaphore);
        rcu_read_lock(); /* RCU serialisation for set-wedged protection */
        if (engine->schedule) {
                struct i915_sched_attr attr = request->gem_context->sched;
        if (flags & I915_WAIT_PRIORITY) {
                if (!i915_request_started(rq) && INTEL_GEN(rq->i915) >= 6)
                        gen6_rps_boost(rq);
+               local_bh_disable(); /* suspend tasklets for reprioritisation */
                i915_schedule_bump_priority(rq, I915_PRIORITY_WAIT);
+               local_bh_enable(); /* kick tasklets en masse */
        }
 
        wait.tsk = current;
 
                struct i915_sw_dma_fence_cb dmaq;
        };
        struct list_head execute_cb;
+       struct i915_sw_fence semaphore;
 
        /*
         * A list of everyone we wait upon, and everyone who waits upon us.
 
 {
        bool ret = false;
 
-       spin_lock(&schedule_lock);
+       spin_lock_irq(&schedule_lock);
 
        if (!node_signaled(signal)) {
                INIT_LIST_HEAD(&dep->dfs_link);
                ret = true;
        }
 
-       spin_unlock(&schedule_lock);
+       spin_unlock_irq(&schedule_lock);
 
        return ret;
 }
 
        GEM_BUG_ON(!list_empty(&node->link));
 
-       spin_lock(&schedule_lock);
+       spin_lock_irq(&schedule_lock);
 
        /*
         * Everyone we depended upon (the fences we wait to be signaled)
                        i915_dependency_free(dep);
        }
 
-       spin_unlock(&schedule_lock);
+       spin_unlock_irq(&schedule_lock);
 }
 
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 
        memset(&cache, 0, sizeof(cache));
        engine = rq->engine;
-       spin_lock_irq(&engine->timeline.lock);
+       spin_lock(&engine->timeline.lock);
 
        /* Fifo and depth-first replacement ensure our deps execute before us */
        list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
                tasklet_hi_schedule(&engine->execlists.tasklet);
        }
 
-       spin_unlock_irq(&engine->timeline.lock);
+       spin_unlock(&engine->timeline.lock);
 }
 
 void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr)
 {
-       spin_lock(&schedule_lock);
+       spin_lock_irq(&schedule_lock);
        __i915_schedule(rq, attr);
-       spin_unlock(&schedule_lock);
+       spin_unlock_irq(&schedule_lock);
 }
 
 void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump)
 {
        struct i915_sched_attr attr;
+       unsigned long flags;
 
        GEM_BUG_ON(bump & ~I915_PRIORITY_MASK);
 
        if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID)
                return;
 
-       spin_lock_bh(&schedule_lock);
+       spin_lock_irqsave(&schedule_lock, flags);
 
        attr = rq->sched.attr;
        attr.priority |= bump;
        __i915_schedule(rq, &attr);
 
-       spin_unlock_bh(&schedule_lock);
+       spin_unlock_irqrestore(&schedule_lock, flags);
 }
 
 void __i915_priolist_free(struct i915_priolist *p)