#include "i915_drv.h"
 #include "intel_renderstate.h"
 
-struct render_state {
+struct intel_render_state {
        const struct intel_renderstate_rodata *rodata;
        struct i915_vma *vma;
-       u32 aux_batch_size;
-       u32 aux_batch_offset;
+       u32 batch_offset;
+       u32 batch_size;
+       u32 aux_offset;
+       u32 aux_size;
 };
 
 static const struct intel_renderstate_rodata *
-render_state_get_rodata(const struct drm_i915_gem_request *req)
+render_state_get_rodata(const struct intel_engine_cs *engine)
 {
-       switch (INTEL_GEN(req->i915)) {
+       switch (INTEL_GEN(engine->i915)) {
        case 6:
                return &gen6_null_state;
        case 7:
  */
 #define OUT_BATCH(batch, i, val)                               \
        do {                                                    \
-               if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) {  \
-                       ret = -ENOSPC;                          \
-                       goto err_out;                           \
-               }                                               \
+               if ((i) >= PAGE_SIZE / sizeof(u32))             \
+                       goto err;                               \
                (batch)[(i)++] = (val);                         \
        } while(0)
 
-static int render_state_setup(struct render_state *so)
+static int render_state_setup(struct intel_render_state *so,
+                             struct drm_i915_private *i915)
 {
-       struct drm_i915_private *dev_priv = to_i915(so->vma->vm->dev);
        const struct intel_renderstate_rodata *rodata = so->rodata;
-       const bool has_64bit_reloc = INTEL_GEN(dev_priv) >= 8;
+       const bool has_64bit_reloc = INTEL_GEN(i915) >= 8;
+       struct drm_i915_gem_object *obj = so->vma->obj;
        unsigned int i = 0, reloc_index = 0;
-       struct page *page;
+       unsigned int needs_clflush;
        u32 *d;
        int ret;
 
-       ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true);
+       ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
        if (ret)
                return ret;
 
-       page = i915_gem_object_get_dirty_page(so->vma->obj, 0);
-       d = kmap(page);
+       d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0));
 
        while (i < rodata->batch_items) {
                u32 s = rodata->batch[i];
                        s = lower_32_bits(r);
                        if (has_64bit_reloc) {
                                if (i + 1 >= rodata->batch_items ||
-                                   rodata->batch[i + 1] != 0) {
-                                       ret = -EINVAL;
-                                       goto err_out;
-                               }
+                                   rodata->batch[i + 1] != 0)
+                                       goto err;
 
                                d[i++] = s;
                                s = upper_32_bits(r);
                d[i++] = s;
        }
 
+       if (rodata->reloc[reloc_index] != -1) {
+               DRM_ERROR("only %d relocs resolved\n", reloc_index);
+               goto err;
+       }
+
+       so->batch_offset = so->vma->node.start;
+       so->batch_size = rodata->batch_items * sizeof(u32);
+
        while (i % CACHELINE_DWORDS)
                OUT_BATCH(d, i, MI_NOOP);
 
-       so->aux_batch_offset = i * sizeof(u32);
+       so->aux_offset = i * sizeof(u32);
 
-       if (HAS_POOLED_EU(dev_priv)) {
+       if (HAS_POOLED_EU(i915)) {
                /*
                 * We always program 3x6 pool config but depending upon which
                 * subslice is disabled HW drops down to appropriate config
        }
 
        OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
-       so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
-
+       so->aux_size = i * sizeof(u32) - so->aux_offset;
+       so->aux_offset += so->batch_offset;
        /*
         * Since we are sending length, we need to strictly conform to
         * all requirements. For Gen2 this must be a multiple of 8.
         */
-       so->aux_batch_size = ALIGN(so->aux_batch_size, 8);
-
-       kunmap(page);
-
-       ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false);
-       if (ret)
-               return ret;
-
-       if (rodata->reloc[reloc_index] != -1) {
-               DRM_ERROR("only %d relocs resolved\n", reloc_index);
-               return -EINVAL;
-       }
+       so->aux_size = ALIGN(so->aux_size, 8);
 
-       return 0;
+       if (needs_clflush)
+               drm_clflush_virt_range(d, i * sizeof(u32));
+       kunmap_atomic(d);
 
-err_out:
-       kunmap(page);
+       ret = i915_gem_object_set_to_gtt_domain(obj, false);
+out:
+       i915_gem_obj_finish_shmem_access(obj);
        return ret;
+
+err:
+       kunmap_atomic(d);
+       ret = -EINVAL;
+       goto out;
 }
 
 #undef OUT_BATCH
 
-int i915_gem_render_state_init(struct drm_i915_gem_request *req)
+int i915_gem_render_state_init(struct intel_engine_cs *engine)
 {
-       struct render_state so;
+       struct intel_render_state *so;
+       const struct intel_renderstate_rodata *rodata;
        struct drm_i915_gem_object *obj;
        int ret;
 
-       if (WARN_ON(req->engine->id != RCS))
-               return -ENOENT;
+       if (engine->id != RCS)
+               return 0;
 
-       so.rodata = render_state_get_rodata(req);
-       if (!so.rodata)
+       rodata = render_state_get_rodata(engine);
+       if (!rodata)
                return 0;
 
-       if (so.rodata->batch_items * 4 > 4096)
+       if (rodata->batch_items * 4 > 4096)
                return -EINVAL;
 
-       obj = i915_gem_object_create_internal(req->i915, 4096);
-       if (IS_ERR(obj))
-               return PTR_ERR(obj);
+       so = kmalloc(sizeof(*so), GFP_KERNEL);
+       if (!so)
+               return -ENOMEM;
 
-       so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL);
-       if (IS_ERR(so.vma)) {
-               ret = PTR_ERR(so.vma);
-               goto err_obj;
+       obj = i915_gem_object_create_internal(engine->i915, 4096);
+       if (IS_ERR(obj)) {
+               ret = PTR_ERR(obj);
+               goto err_free;
        }
 
-       ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL);
-       if (ret)
+       so->vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
+       if (IS_ERR(so->vma)) {
+               ret = PTR_ERR(so->vma);
                goto err_obj;
+       }
+
+       so->rodata = rodata;
+       engine->render_state = so;
+       return 0;
 
-       ret = render_state_setup(&so);
+err_obj:
+       i915_gem_object_put(obj);
+err_free:
+       kfree(so);
+       return ret;
+}
+
+int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
+{
+       struct intel_render_state *so;
+       int ret;
+
+       so = req->engine->render_state;
+       if (!so)
+               return 0;
+
+       /* Recreate the page after shrinking */
+       if (!so->vma->obj->pages)
+               so->batch_offset = -1;
+
+       ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
        if (ret)
-               goto err_unpin;
+               return ret;
 
-       ret = req->engine->emit_bb_start(req, so.vma->node.start,
-                                        so.rodata->batch_items * 4,
+       if (so->vma->node.start != so->batch_offset) {
+               ret = render_state_setup(so, req->i915);
+               if (ret)
+                       goto err_unpin;
+       }
+
+       ret = req->engine->emit_bb_start(req,
+                                        so->batch_offset, so->batch_size,
                                         I915_DISPATCH_SECURE);
        if (ret)
                goto err_unpin;
 
-       if (so.aux_batch_size > 8) {
+       if (so->aux_size > 8) {
                ret = req->engine->emit_bb_start(req,
-                                                (so.vma->node.start +
-                                                 so.aux_batch_offset),
-                                                so.aux_batch_size,
+                                                so->aux_offset, so->aux_size,
                                                 I915_DISPATCH_SECURE);
                if (ret)
                        goto err_unpin;
        }
 
-       i915_vma_move_to_active(so.vma, req, 0);
+       i915_vma_move_to_active(so->vma, req, 0);
 err_unpin:
-       i915_vma_unpin(so.vma);
-       i915_vma_close(so.vma);
-err_obj:
-       __i915_gem_object_release_unless_active(obj);
+       i915_vma_unpin(so->vma);
        return ret;
 }
+
+void i915_gem_render_state_fini(struct intel_engine_cs *engine)
+{
+       struct intel_render_state *so;
+       struct drm_i915_gem_object *obj;
+
+       so = fetch_and_zero(&engine->render_state);
+       if (!so)
+               return;
+
+       obj = so->vma->obj;
+
+       i915_vma_close(so->vma);
+       __i915_gem_object_release_unless_active(obj);
+
+       kfree(so);
+}