#define GUC_REQUEST_SIZE 64 /* bytes */
 
+/*
+ * Below is a set of functions which control the GuC scheduling state which do
+ * not require a lock as all state transitions are mutually exclusive. i.e. It
+ * is not possible for the context pinning code and submission, for the same
+ * context, to be executing simultaneously. We still need an atomic as it is
+ * possible for some of the bits to changing at the same time though.
+ */
+#define SCHED_STATE_NO_LOCK_ENABLED                    BIT(0)
+static inline bool context_enabled(struct intel_context *ce)
+{
+       return (atomic_read(&ce->guc_sched_state_no_lock) &
+               SCHED_STATE_NO_LOCK_ENABLED);
+}
+
+static inline void set_context_enabled(struct intel_context *ce)
+{
+       atomic_or(SCHED_STATE_NO_LOCK_ENABLED, &ce->guc_sched_state_no_lock);
+}
+
+static inline void clr_context_enabled(struct intel_context *ce)
+{
+       atomic_and((u32)~SCHED_STATE_NO_LOCK_ENABLED,
+                  &ce->guc_sched_state_no_lock);
+}
+
 static inline struct i915_priolist *to_priolist(struct rb_node *rb)
 {
        return rb_entry(rb, struct i915_priolist, node);
        xa_store_irq(&guc->context_lookup, id, ce, GFP_ATOMIC);
 }
 
-static void guc_add_request(struct intel_guc *guc, struct i915_request *rq)
+static int guc_add_request(struct intel_guc *guc, struct i915_request *rq)
 {
-       /* Leaving stub as this function will be used in future patches */
-}
+       int err;
+       struct intel_context *ce = rq->context;
+       u32 action[3];
+       int len = 0;
+       bool enabled = context_enabled(ce);
 
-/*
- * When we're doing submissions using regular execlists backend, writing to
- * ELSP from CPU side is enough to make sure that writes to ringbuffer pages
- * pinned in mappable aperture portion of GGTT are visible to command streamer.
- * Writes done by GuC on our behalf are not guaranteeing such ordering,
- * therefore, to ensure the flush, we're issuing a POSTING READ.
- */
-static void flush_ggtt_writes(struct i915_vma *vma)
-{
-       if (i915_vma_is_map_and_fenceable(vma))
-               intel_uncore_posting_read_fw(vma->vm->gt->uncore,
-                                            GUC_STATUS);
-}
+       if (!enabled) {
+               action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_SET;
+               action[len++] = ce->guc_id;
+               action[len++] = GUC_CONTEXT_ENABLE;
+       } else {
+               action[len++] = INTEL_GUC_ACTION_SCHED_CONTEXT;
+               action[len++] = ce->guc_id;
+       }
 
-static void guc_submit(struct intel_engine_cs *engine,
-                      struct i915_request **out,
-                      struct i915_request **end)
-{
-       struct intel_guc *guc = &engine->gt->uc.guc;
+       err = intel_guc_send_nb(guc, action, len);
 
-       do {
-               struct i915_request *rq = *out++;
+       if (!enabled && !err)
+               set_context_enabled(ce);
 
-               flush_ggtt_writes(rq->ring->vma);
-               guc_add_request(guc, rq);
-       } while (out != end);
+       return err;
 }
 
 static inline int rq_prio(const struct i915_request *rq)
        return rq->sched.attr.priority;
 }
 
-static struct i915_request *schedule_in(struct i915_request *rq, int idx)
+static int guc_dequeue_one_context(struct intel_guc *guc)
 {
-       trace_i915_request_in(rq, idx);
-
-       /*
-        * Currently we are not tracking the rq->context being inflight
-        * (ce->inflight = rq->engine). It is only used by the execlists
-        * backend at the moment, a similar counting strategy would be
-        * required if we generalise the inflight tracking.
-        */
-
-       __intel_gt_pm_get(rq->engine->gt);
-       return i915_request_get(rq);
-}
-
-static void schedule_out(struct i915_request *rq)
-{
-       trace_i915_request_out(rq);
-
-       intel_gt_pm_put_async(rq->engine->gt);
-       i915_request_put(rq);
-}
-
-static void __guc_dequeue(struct intel_engine_cs *engine)
-{
-       struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct i915_sched_engine * const sched_engine = engine->sched_engine;
-       struct i915_request **first = execlists->inflight;
-       struct i915_request ** const last_port = first + execlists->port_mask;
-       struct i915_request *last = first[0];
-       struct i915_request **port;
+       struct i915_sched_engine * const sched_engine = guc->sched_engine;
+       struct i915_request *last = NULL;
        bool submit = false;
        struct rb_node *rb;
+       int ret;
 
        lockdep_assert_held(&sched_engine->lock);
 
-       if (last) {
-               if (*++first)
-                       return;
-
-               last = NULL;
+       if (guc->stalled_request) {
+               submit = true;
+               last = guc->stalled_request;
+               goto resubmit;
        }
 
-       /*
-        * We write directly into the execlists->inflight queue and don't use
-        * the execlists->pending queue, as we don't have a distinct switch
-        * event.
-        */
-       port = first;
        while ((rb = rb_first_cached(&sched_engine->queue))) {
                struct i915_priolist *p = to_priolist(rb);
                struct i915_request *rq, *rn;
 
                priolist_for_each_request_consume(rq, rn, p) {
-                       if (last && rq->context != last->context) {
-                               if (port == last_port)
-                                       goto done;
-
-                               *port = schedule_in(last,
-                                                   port - execlists->inflight);
-                               port++;
-                       }
+                       if (last && rq->context != last->context)
+                               goto done;
 
                        list_del_init(&rq->sched.link);
+
                        __i915_request_submit(rq);
-                       submit = true;
+
+                       trace_i915_request_in(rq, 0);
                        last = rq;
+                       submit = true;
                }
 
                rb_erase_cached(&p->node, &sched_engine->queue);
                i915_priolist_free(p);
        }
 done:
-       sched_engine->queue_priority_hint =
-               rb ? to_priolist(rb)->priority : INT_MIN;
        if (submit) {
-               *port = schedule_in(last, port - execlists->inflight);
-               *++port = NULL;
-               guc_submit(engine, first, port);
+               last->context->lrc_reg_state[CTX_RING_TAIL] =
+                       intel_ring_set_tail(last->ring, last->tail);
+resubmit:
+               /*
+                * We only check for -EBUSY here even though it is possible for
+                * -EDEADLK to be returned. If -EDEADLK is returned, the GuC has
+                * died and a full GT reset needs to be done. The hangcheck will
+                * eventually detect that the GuC has died and trigger this
+                * reset so no need to handle -EDEADLK here.
+                */
+               ret = guc_add_request(guc, last);
+               if (ret == -EBUSY) {
+                       tasklet_schedule(&sched_engine->tasklet);
+                       guc->stalled_request = last;
+                       return false;
+               }
        }
-       execlists->active = execlists->inflight;
+
+       guc->stalled_request = NULL;
+       return submit;
 }
 
 static void guc_submission_tasklet(struct tasklet_struct *t)
 {
        struct i915_sched_engine *sched_engine =
                from_tasklet(sched_engine, t, tasklet);
-       struct intel_engine_cs * const engine = sched_engine->private_data;
-       struct intel_engine_execlists * const execlists = &engine->execlists;
-       struct i915_request **port, *rq;
        unsigned long flags;
+       bool loop;
 
-       spin_lock_irqsave(&engine->sched_engine->lock, flags);
-
-       for (port = execlists->inflight; (rq = *port); port++) {
-               if (!i915_request_completed(rq))
-                       break;
-
-               schedule_out(rq);
-       }
-       if (port != execlists->inflight) {
-               int idx = port - execlists->inflight;
-               int rem = ARRAY_SIZE(execlists->inflight) - idx;
-               memmove(execlists->inflight, port, rem * sizeof(*port));
-       }
+       spin_lock_irqsave(&sched_engine->lock, flags);
 
-       __guc_dequeue(engine);
+       do {
+               loop = guc_dequeue_one_context(sched_engine->private_data);
+       } while (loop);
 
-       i915_sched_engine_reset_on_empty(engine->sched_engine);
+       i915_sched_engine_reset_on_empty(sched_engine);
 
-       spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
+       spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
 static void cs_irq_handler(struct intel_engine_cs *engine, u16 iir)
 {
-       if (iir & GT_RENDER_USER_INTERRUPT) {
+       if (iir & GT_RENDER_USER_INTERRUPT)
                intel_engine_signal_breadcrumbs(engine);
-               tasklet_hi_schedule(&engine->sched_engine->tasklet);
-       }
 }
 
 static void guc_reset_prepare(struct intel_engine_cs *engine)
        struct rb_node *rb;
        unsigned long flags;
 
+       /* Can be called during boot if GuC fails to load */
+       if (!engine->gt)
+               return;
+
        ENGINE_TRACE(engine, "\n");
 
        /*
 
 void intel_guc_submission_fini(struct intel_guc *guc)
 {
-       if (guc->lrc_desc_pool)
-               guc_lrc_desc_pool_destroy(guc);
+       if (!guc->lrc_desc_pool)
+               return;
+
+       guc_lrc_desc_pool_destroy(guc);
+       i915_sched_engine_put(guc->sched_engine);
 }
 
 static int guc_context_alloc(struct intel_context *ce)
        return 0;
 }
 
-static inline void queue_request(struct intel_engine_cs *engine,
+static inline void queue_request(struct i915_sched_engine *sched_engine,
                                 struct i915_request *rq,
                                 int prio)
 {
        GEM_BUG_ON(!list_empty(&rq->sched.link));
        list_add_tail(&rq->sched.link,
-                     i915_sched_lookup_priolist(engine->sched_engine, prio));
+                     i915_sched_lookup_priolist(sched_engine, prio));
        set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags);
 }
 
 static void guc_submit_request(struct i915_request *rq)
 {
-       struct intel_engine_cs *engine = rq->engine;
+       struct i915_sched_engine *sched_engine = rq->engine->sched_engine;
        unsigned long flags;
 
        /* Will be called from irq-context when using foreign fences. */
-       spin_lock_irqsave(&engine->sched_engine->lock, flags);
+       spin_lock_irqsave(&sched_engine->lock, flags);
 
-       queue_request(engine, rq, rq_prio(rq));
+       queue_request(sched_engine, rq, rq_prio(rq));
 
-       GEM_BUG_ON(i915_sched_engine_is_empty(engine->sched_engine));
+       GEM_BUG_ON(i915_sched_engine_is_empty(sched_engine));
        GEM_BUG_ON(list_empty(&rq->sched.link));
 
-       tasklet_hi_schedule(&engine->sched_engine->tasklet);
+       tasklet_hi_schedule(&sched_engine->tasklet);
 
-       spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
+       spin_unlock_irqrestore(&sched_engine->lock, flags);
 }
 
 static void sanitize_hwsp(struct intel_engine_cs *engine)
 {
        engine->sanitize = NULL; /* no longer in control, nothing to sanitize */
 
-       tasklet_kill(&engine->sched_engine->tasklet);
-
        intel_engine_cleanup_common(engine);
        lrc_fini_wa_ctx(engine);
 }
 int intel_guc_submission_setup(struct intel_engine_cs *engine)
 {
        struct drm_i915_private *i915 = engine->i915;
+       struct intel_guc *guc = &engine->gt->uc.guc;
 
        /*
         * The setup relies on several assumptions (e.g. irqs always enabled)
         */
        GEM_BUG_ON(GRAPHICS_VER(i915) < 11);
 
-       tasklet_setup(&engine->sched_engine->tasklet, guc_submission_tasklet);
+       if (!guc->sched_engine) {
+               guc->sched_engine = i915_sched_engine_create(ENGINE_VIRTUAL);
+               if (!guc->sched_engine)
+                       return -ENOMEM;
+
+               guc->sched_engine->schedule = i915_schedule;
+               guc->sched_engine->private_data = guc;
+               tasklet_setup(&guc->sched_engine->tasklet,
+                             guc_submission_tasklet);
+       }
+       i915_sched_engine_put(engine->sched_engine);
+       engine->sched_engine = i915_sched_engine_get(guc->sched_engine);
 
        guc_default_vfuncs(engine);
        guc_default_irqs(engine);