ring_id_to_context_switch_event(execlist->ring_id));
 }
 
-int emulate_execlist_ctx_schedule_out(
+static int emulate_execlist_ctx_schedule_out(
                struct intel_vgpu_execlist *execlist,
                struct execlist_ctx_descriptor_format *ctx)
 {
        return &execlist->slot[status.execlist_write_pointer];
 }
 
-int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
+static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
                struct execlist_ctx_descriptor_format ctx[2])
 {
        struct intel_vgpu_execlist_slot *running = execlist->running_slot;
        return 0;
 }
 
+static void free_workload(struct intel_vgpu_workload *workload)
+{
+       intel_vgpu_unpin_mm(workload->shadow_mm);
+       intel_gvt_mm_unreference(workload->shadow_mm);
+       kmem_cache_free(workload->vgpu->workloads, workload);
+}
+
+#define get_desc_from_elsp_dwords(ed, i) \
+       ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
+
+static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
+{
+       struct intel_vgpu *vgpu = workload->vgpu;
+       struct execlist_ctx_descriptor_format ctx[2];
+       int ring_id = workload->ring_id;
+
+       intel_vgpu_pin_mm(workload->shadow_mm);
+       intel_vgpu_sync_oos_pages(workload->vgpu);
+       intel_vgpu_flush_post_shadow(workload->vgpu);
+       if (!workload->emulate_schedule_in)
+               return 0;
+
+       ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
+       ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
+
+       return emulate_execlist_schedule_in(&vgpu->execlist[ring_id], ctx);
+}
+
+static int complete_execlist_workload(struct intel_vgpu_workload *workload)
+{
+       struct intel_vgpu *vgpu = workload->vgpu;
+       struct intel_vgpu_execlist *execlist =
+               &vgpu->execlist[workload->ring_id];
+       struct intel_vgpu_workload *next_workload;
+       struct list_head *next = workload_q_head(vgpu, workload->ring_id)->next;
+       bool lite_restore = false;
+       int ret;
+
+       gvt_dbg_el("complete workload %p status %d\n", workload,
+                       workload->status);
+
+       if (workload->status)
+               goto out;
+
+       if (!list_empty(workload_q_head(vgpu, workload->ring_id))) {
+               struct execlist_ctx_descriptor_format *this_desc, *next_desc;
+
+               next_workload = container_of(next,
+                               struct intel_vgpu_workload, list);
+               this_desc = &workload->ctx_desc;
+               next_desc = &next_workload->ctx_desc;
+
+               lite_restore = same_context(this_desc, next_desc);
+       }
+
+       if (lite_restore) {
+               gvt_dbg_el("next context == current - no schedule-out\n");
+               free_workload(workload);
+               return 0;
+       }
+
+       ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
+       if (ret)
+               goto err;
+out:
+       free_workload(workload);
+       return 0;
+err:
+       free_workload(workload);
+       return ret;
+}
+
+#define RING_CTX_OFF(x) \
+       offsetof(struct execlist_ring_context, x)
+
+static void read_guest_pdps(struct intel_vgpu *vgpu,
+               u64 ring_context_gpa, u32 pdp[8])
+{
+       u64 gpa;
+       int i;
+
+       gpa = ring_context_gpa + RING_CTX_OFF(pdp3_UDW.val);
+
+       for (i = 0; i < 8; i++)
+               intel_gvt_hypervisor_read_gpa(vgpu,
+                               gpa + i * 8, &pdp[7 - i], 4);
+}
+
+static int prepare_mm(struct intel_vgpu_workload *workload)
+{
+       struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
+       struct intel_vgpu_mm *mm;
+       int page_table_level;
+       u32 pdp[8];
+
+       if (desc->addressing_mode == 1) { /* legacy 32-bit */
+               page_table_level = 3;
+       } else if (desc->addressing_mode == 3) { /* legacy 64 bit */
+               page_table_level = 4;
+       } else {
+               gvt_err("Advanced Context mode(SVM) is not supported!\n");
+               return -EINVAL;
+       }
+
+       read_guest_pdps(workload->vgpu, workload->ring_context_gpa, pdp);
+
+       mm = intel_vgpu_find_ppgtt_mm(workload->vgpu, page_table_level, pdp);
+       if (mm) {
+               intel_gvt_mm_reference(mm);
+       } else {
+
+               mm = intel_vgpu_create_mm(workload->vgpu, INTEL_GVT_MM_PPGTT,
+                               pdp, page_table_level, 0);
+               if (IS_ERR(mm)) {
+                       gvt_err("fail to create mm object.\n");
+                       return PTR_ERR(mm);
+               }
+       }
+       workload->shadow_mm = mm;
+       return 0;
+}
+
+#define get_last_workload(q) \
+       (list_empty(q) ? NULL : container_of(q->prev, \
+       struct intel_vgpu_workload, list))
+
+bool submit_context(struct intel_vgpu *vgpu, int ring_id,
+               struct execlist_ctx_descriptor_format *desc,
+               bool emulate_schedule_in)
+{
+       struct list_head *q = workload_q_head(vgpu, ring_id);
+       struct intel_vgpu_workload *last_workload = get_last_workload(q);
+       struct intel_vgpu_workload *workload = NULL;
+       u64 ring_context_gpa;
+       u32 head, tail, start, ctl, ctx_ctl;
+       int ret;
+
+       ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
+                       (u32)((desc->lrca + 1) << GTT_PAGE_SHIFT));
+       if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) {
+               gvt_err("invalid guest context LRCA: %x\n", desc->lrca);
+               return -EINVAL;
+       }
+
+       intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+                       RING_CTX_OFF(ring_header.val), &head, 4);
+
+       intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+                       RING_CTX_OFF(ring_tail.val), &tail, 4);
+
+       head &= RB_HEAD_OFF_MASK;
+       tail &= RB_TAIL_OFF_MASK;
+
+       if (last_workload && same_context(&last_workload->ctx_desc, desc)) {
+               gvt_dbg_el("ring id %d cur workload == last\n", ring_id);
+               gvt_dbg_el("ctx head %x real head %lx\n", head,
+                               last_workload->rb_tail);
+               /*
+                * cannot use guest context head pointer here,
+                * as it might not be updated at this time
+                */
+               head = last_workload->rb_tail;
+       }
+
+       gvt_dbg_el("ring id %d begin a new workload\n", ring_id);
+
+       workload = kmem_cache_zalloc(vgpu->workloads, GFP_KERNEL);
+       if (!workload)
+               return -ENOMEM;
+
+       /* record some ring buffer register values for scan and shadow */
+       intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+                       RING_CTX_OFF(rb_start.val), &start, 4);
+       intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+                       RING_CTX_OFF(rb_ctrl.val), &ctl, 4);
+       intel_gvt_hypervisor_read_gpa(vgpu, ring_context_gpa +
+                       RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
+
+       INIT_LIST_HEAD(&workload->list);
+
+       init_waitqueue_head(&workload->shadow_ctx_status_wq);
+       atomic_set(&workload->shadow_ctx_active, 0);
+
+       workload->vgpu = vgpu;
+       workload->ring_id = ring_id;
+       workload->ctx_desc = *desc;
+       workload->ring_context_gpa = ring_context_gpa;
+       workload->rb_head = head;
+       workload->rb_tail = tail;
+       workload->rb_start = start;
+       workload->rb_ctl = ctl;
+       workload->prepare = prepare_execlist_workload;
+       workload->complete = complete_execlist_workload;
+       workload->status = -EINPROGRESS;
+       workload->emulate_schedule_in = emulate_schedule_in;
+
+       if (emulate_schedule_in)
+               memcpy(&workload->elsp_dwords,
+                               &vgpu->execlist[ring_id].elsp_dwords,
+                               sizeof(workload->elsp_dwords));
+
+       gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
+                       workload, ring_id, head, tail, start, ctl);
+
+       gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
+                       emulate_schedule_in);
+
+       ret = prepare_mm(workload);
+       if (ret) {
+               kmem_cache_free(vgpu->workloads, workload);
+               return ret;
+       }
+
+       queue_workload(workload);
+       return 0;
+}
+
+int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
+{
+       struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
+       struct execlist_ctx_descriptor_format *desc[2], valid_desc[2];
+       unsigned long valid_desc_bitmap = 0;
+       bool emulate_schedule_in = true;
+       int ret;
+       int i;
+
+       memset(valid_desc, 0, sizeof(valid_desc));
+
+       desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
+       desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
+
+       for (i = 0; i < 2; i++) {
+               if (!desc[i]->valid)
+                       continue;
+
+               if (!desc[i]->privilege_access) {
+                       gvt_err("vgpu%d: unexpected GGTT elsp submission\n",
+                                       vgpu->id);
+                       return -EINVAL;
+               }
+
+               /* TODO: add another guest context checks here. */
+               set_bit(i, &valid_desc_bitmap);
+               valid_desc[i] = *desc[i];
+       }
+
+       if (!valid_desc_bitmap) {
+               gvt_err("vgpu%d: no valid desc in a elsp submission\n",
+                               vgpu->id);
+               return -EINVAL;
+       }
+
+       if (!test_bit(0, (void *)&valid_desc_bitmap) &&
+                       test_bit(1, (void *)&valid_desc_bitmap)) {
+               gvt_err("vgpu%d: weird elsp submission, desc 0 is not valid\n",
+                               vgpu->id);
+               return -EINVAL;
+       }
+
+       /* submit workload */
+       for_each_set_bit(i, (void *)&valid_desc_bitmap, 2) {
+               ret = submit_context(vgpu, ring_id, &valid_desc[i],
+                               emulate_schedule_in);
+               if (ret) {
+                       gvt_err("vgpu%d: fail to schedule workload\n",
+                                       vgpu->id);
+                       return ret;
+               }
+               emulate_schedule_in = false;
+       }
+       return 0;
+}
+
 static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
 {
        struct intel_vgpu_execlist *execlist = &vgpu->execlist[ring_id];
        vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 }
 
+void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu)
+{
+       kmem_cache_destroy(vgpu->workloads);
+}
+
 int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
 {
        int i;
 
        /* each ring has a virtual execlist engine */
-       for (i = 0; i < I915_NUM_ENGINES; i++)
+       for (i = 0; i < I915_NUM_ENGINES; i++) {
                init_vgpu_execlist(vgpu, i);
+               INIT_LIST_HEAD(&vgpu->workload_q_head[i]);
+       }
+
+       vgpu->workloads = kmem_cache_create("gvt-g vgpu workload",
+                       sizeof(struct intel_vgpu_workload), 0,
+                       SLAB_HWCACHE_ALIGN,
+                       NULL);
+
+       if (!vgpu->workloads)
+               return -ENOMEM;
 
        return 0;
 }
 
        return 0;
 }
 
+static int render_mmio_to_ring_id(struct intel_gvt *gvt, unsigned int reg)
+{
+       int i;
+
+       reg &= ~GENMASK(11, 0);
+       for (i = 0; i < I915_NUM_ENGINES; i++) {
+               if (gvt->dev_priv->engine[i].mmio_base == reg)
+                       return i;
+       }
+       return -1;
+}
+
 #define offset_to_fence_num(offset) \
        ((offset - i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0))) >> 3)
 
        return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
 }
 
+static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+               void *p_data, unsigned int bytes)
+{
+       int ring_id = render_mmio_to_ring_id(vgpu->gvt, offset);
+       struct intel_vgpu_execlist *execlist;
+       u32 data = *(u32 *)p_data;
+       int ret;
+
+       if (WARN_ON(ring_id < 0))
+               return -EINVAL;
+
+       execlist = &vgpu->execlist[ring_id];
+
+       execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data;
+       if (execlist->elsp_dwords.index == 3)
+               ret = intel_vgpu_submit_execlist(vgpu, ring_id);
+
+       ++execlist->elsp_dwords.index;
+       execlist->elsp_dwords.index &= 0x3;
+       return 0;
+}
+
 #define MMIO_F(reg, s, f, am, rm, d, r, w) do { \
        ret = new_mmio_info(gvt, INTEL_GVT_MMIO_OFFSET(reg), \
                f, s, am, rm, d, r, w); \
        MMIO_RING_D(RING_ACTHD_UDW, D_BDW_PLUS);
 
 #define RING_REG(base) (base + 0x230)
-       MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, NULL);
-       MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, NULL);
+       MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write);
+       MMIO_DH(RING_REG(GEN8_BSD2_RING_BASE), D_BDW_PLUS, NULL, elsp_mmio_write);
 #undef RING_REG
 
 #define RING_REG(base) (base + 0x234)
 
--- /dev/null
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _GVT_SCHEDULER_H_
+#define _GVT_SCHEDULER_H_
+
+struct intel_gvt_workload_scheduler {
+       struct list_head workload_q_head[I915_NUM_ENGINES];
+};
+
+struct intel_vgpu_workload {
+       struct intel_vgpu *vgpu;
+       int ring_id;
+       struct drm_i915_gem_request *req;
+       /* if this workload has been dispatched to i915? */
+       bool dispatched;
+       int status;
+
+       struct intel_vgpu_mm *shadow_mm;
+
+       /* different submission model may need different handler */
+       int (*prepare)(struct intel_vgpu_workload *);
+       int (*complete)(struct intel_vgpu_workload *);
+       struct list_head list;
+
+       /* execlist context information */
+       struct execlist_ctx_descriptor_format ctx_desc;
+       struct execlist_ring_context *ring_context;
+       unsigned long rb_head, rb_tail, rb_ctl, rb_start;
+       struct intel_vgpu_elsp_dwords elsp_dwords;
+       bool emulate_schedule_in;
+       atomic_t shadow_ctx_active;
+       wait_queue_head_t shadow_ctx_status_wq;
+       u64 ring_context_gpa;
+};
+
+#define workload_q_head(vgpu, ring_id) \
+       (&(vgpu->workload_q_head[ring_id]))
+
+#define queue_workload(workload) \
+       list_add_tail(&workload->list, \
+       workload_q_head(workload->vgpu, workload->ring_id))
+
+#endif