cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
        cache->node.flags = 0;
        cache->rq = NULL;
-       cache->rq_size = 0;
+       cache->target = NULL;
 }
 
 static inline void *unmask_page(unsigned long p)
 
                ce = intel_context_create(engine);
                if (IS_ERR(ce)) {
-                       err = PTR_ERR(rq);
+                       err = PTR_ERR(ce);
                        goto err_unpin;
                }
 
        return err;
 }
 
+static bool reloc_can_use_engine(const struct intel_engine_cs *engine)
+{
+       return engine->class != VIDEO_DECODE_CLASS || !IS_GEN(engine->i915, 6);
+}
+
 static u32 *reloc_gpu(struct i915_execbuffer *eb,
                      struct i915_vma *vma,
                      unsigned int len)
        if (unlikely(!cache->rq)) {
                struct intel_engine_cs *engine = eb->engine;
 
-               if (!intel_engine_can_store_dword(engine)) {
+               if (!reloc_can_use_engine(engine)) {
                        engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0];
-                       if (!engine || !intel_engine_can_store_dword(engine))
+                       if (!engine)
                                return ERR_PTR(-ENODEV);
                }
 
        return !dma_resv_test_signaled_rcu(vma->resv, true);
 }
 
-static u64
-relocate_entry(struct i915_vma *vma,
-              const struct drm_i915_gem_relocation_entry *reloc,
-              struct i915_execbuffer *eb,
-              const struct i915_vma *target)
+static unsigned long vma_phys_addr(struct i915_vma *vma, u32 offset)
 {
-       u64 offset = reloc->offset;
-       u64 target_offset = relocation_target(reloc, target);
-       bool wide = eb->reloc_cache.use_64bit_reloc;
-       void *vaddr;
+       struct page *page;
+       unsigned long addr;
 
-       if (!eb->reloc_cache.vaddr && use_reloc_gpu(vma)) {
-               const unsigned int gen = eb->reloc_cache.gen;
-               unsigned int len;
-               u32 *batch;
-               u64 addr;
+       GEM_BUG_ON(vma->pages != vma->obj->mm.pages);
 
-               if (wide)
-                       len = offset & 7 ? 8 : 5;
-               else if (gen >= 4)
-                       len = 4;
-               else
-                       len = 3;
+       page = i915_gem_object_get_page(vma->obj, offset >> PAGE_SHIFT);
+       addr = PFN_PHYS(page_to_pfn(page));
+       GEM_BUG_ON(overflows_type(addr, u32)); /* expected dma32 */
 
-               batch = reloc_gpu(eb, vma, len);
-               if (IS_ERR(batch))
-                       goto repeat;
+       return addr + offset_in_page(offset);
+}
+
+static bool __reloc_entry_gpu(struct i915_execbuffer *eb,
+                             struct i915_vma *vma,
+                             u64 offset,
+                             u64 target_addr)
+{
+       const unsigned int gen = eb->reloc_cache.gen;
+       unsigned int len;
+       u32 *batch;
+       u64 addr;
+
+       if (gen >= 8)
+               len = offset & 7 ? 8 : 5;
+       else if (gen >= 4)
+               len = 4;
+       else
+               len = 3;
+
+       batch = reloc_gpu(eb, vma, len);
+       if (IS_ERR(batch))
+               return false;
+
+       addr = gen8_canonical_addr(vma->node.start + offset);
+       if (gen >= 8) {
+               if (offset & 7) {
+                       *batch++ = MI_STORE_DWORD_IMM_GEN4;
+                       *batch++ = lower_32_bits(addr);
+                       *batch++ = upper_32_bits(addr);
+                       *batch++ = lower_32_bits(target_addr);
+
+                       addr = gen8_canonical_addr(addr + 4);
 
-               addr = gen8_canonical_addr(vma->node.start + offset);
-               if (wide) {
-                       if (offset & 7) {
-                               *batch++ = MI_STORE_DWORD_IMM_GEN4;
-                               *batch++ = lower_32_bits(addr);
-                               *batch++ = upper_32_bits(addr);
-                               *batch++ = lower_32_bits(target_offset);
-
-                               addr = gen8_canonical_addr(addr + 4);
-
-                               *batch++ = MI_STORE_DWORD_IMM_GEN4;
-                               *batch++ = lower_32_bits(addr);
-                               *batch++ = upper_32_bits(addr);
-                               *batch++ = upper_32_bits(target_offset);
-                       } else {
-                               *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
-                               *batch++ = lower_32_bits(addr);
-                               *batch++ = upper_32_bits(addr);
-                               *batch++ = lower_32_bits(target_offset);
-                               *batch++ = upper_32_bits(target_offset);
-                       }
-               } else if (gen >= 6) {
                        *batch++ = MI_STORE_DWORD_IMM_GEN4;
-                       *batch++ = 0;
-                       *batch++ = addr;
-                       *batch++ = target_offset;
-               } else if (gen >= 4) {
-                       *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
-                       *batch++ = 0;
-                       *batch++ = addr;
-                       *batch++ = target_offset;
+                       *batch++ = lower_32_bits(addr);
+                       *batch++ = upper_32_bits(addr);
+                       *batch++ = upper_32_bits(target_addr);
                } else {
-                       *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-                       *batch++ = addr;
-                       *batch++ = target_offset;
+                       *batch++ = (MI_STORE_DWORD_IMM_GEN4 | (1 << 21)) + 1;
+                       *batch++ = lower_32_bits(addr);
+                       *batch++ = upper_32_bits(addr);
+                       *batch++ = lower_32_bits(target_addr);
+                       *batch++ = upper_32_bits(target_addr);
                }
-
-               goto out;
+       } else if (gen >= 6) {
+               *batch++ = MI_STORE_DWORD_IMM_GEN4;
+               *batch++ = 0;
+               *batch++ = addr;
+               *batch++ = target_addr;
+       } else if (IS_I965G(eb->i915)) {
+               *batch++ = MI_STORE_DWORD_IMM_GEN4;
+               *batch++ = 0;
+               *batch++ = vma_phys_addr(vma, offset);
+               *batch++ = target_addr;
+       } else if (gen >= 4) {
+               *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+               *batch++ = 0;
+               *batch++ = addr;
+               *batch++ = target_addr;
+       } else if (gen >= 3 &&
+                  !(IS_I915G(eb->i915) || IS_I915GM(eb->i915))) {
+               *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+               *batch++ = addr;
+               *batch++ = target_addr;
+       } else {
+               *batch++ = MI_STORE_DWORD_IMM;
+               *batch++ = vma_phys_addr(vma, offset);
+               *batch++ = target_addr;
        }
 
+       return true;
+}
+
+static bool reloc_entry_gpu(struct i915_execbuffer *eb,
+                           struct i915_vma *vma,
+                           u64 offset,
+                           u64 target_addr)
+{
+       if (eb->reloc_cache.vaddr)
+               return false;
+
+       if (!use_reloc_gpu(vma))
+               return false;
+
+       return __reloc_entry_gpu(eb, vma, offset, target_addr);
+}
+
+static u64
+relocate_entry(struct i915_vma *vma,
+              const struct drm_i915_gem_relocation_entry *reloc,
+              struct i915_execbuffer *eb,
+              const struct i915_vma *target)
+{
+       u64 target_addr = relocation_target(reloc, target);
+       u64 offset = reloc->offset;
+
+       if (!reloc_entry_gpu(eb, vma, offset, target_addr)) {
+               bool wide = eb->reloc_cache.use_64bit_reloc;
+               void *vaddr;
+
 repeat:
-       vaddr = reloc_vaddr(vma->obj, &eb->reloc_cache, offset >> PAGE_SHIFT);
-       if (IS_ERR(vaddr))
-               return PTR_ERR(vaddr);
+               vaddr = reloc_vaddr(vma->obj,
+                                   &eb->reloc_cache,
+                                   offset >> PAGE_SHIFT);
+               if (IS_ERR(vaddr))
+                       return PTR_ERR(vaddr);
 
-       clflush_write32(vaddr + offset_in_page(offset),
-                       lower_32_bits(target_offset),
-                       eb->reloc_cache.vaddr);
+               GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+               clflush_write32(vaddr + offset_in_page(offset),
+                               lower_32_bits(target_addr),
+                               eb->reloc_cache.vaddr);
 
-       if (wide) {
-               offset += sizeof(u32);
-               target_offset >>= 32;
-               wide = false;
-               goto repeat;
+               if (wide) {
+                       offset += sizeof(u32);
+                       target_addr >>= 32;
+                       wide = false;
+                       goto repeat;
+               }
        }
 
-out:
        return target->node.start | UPDATE;
 }
 
        kvfree(exec2_list);
        return err;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_execbuffer.c"
+#endif
 
        i915_gem_object_flush_map(t->scratch.vma->obj);
 }
 
-static void hexdump(const void *buf, size_t len)
-{
-       const size_t rowsize = 8 * sizeof(u32);
-       const void *prev = NULL;
-       bool skip = false;
-       size_t pos;
-
-       for (pos = 0; pos < len; pos += rowsize) {
-               char line[128];
-
-               if (prev && !memcmp(prev, buf + pos, rowsize)) {
-                       if (!skip) {
-                               pr_info("*\n");
-                               skip = true;
-                       }
-                       continue;
-               }
-
-               WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
-                                               rowsize, sizeof(u32),
-                                               line, sizeof(line),
-                                               false) >= sizeof(line));
-               pr_info("[%04zx] %s\n", pos, line);
-
-               prev = buf + pos;
-               skip = false;
-       }
-}
-
 static u64 swizzle_bit(unsigned int bit, u64 offset)
 {
        return (offset & BIT_ULL(bit)) >> (bit - 6);
                pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n",
                       repr_tiling(buf->tiling),
                       x, y, buf->start_val);
-               hexdump(vaddr, 4096);
+               igt_hexdump(vaddr, 4096);
        }
 
        i915_gem_object_unpin_map(buf->vma->obj);
 
--- /dev/null
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "gt/intel_engine_pm.h"
+#include "selftests/igt_flush_test.h"
+
+static u64 read_reloc(const u32 *map, int x, const u64 mask)
+{
+       u64 reloc;
+
+       memcpy(&reloc, &map[x], sizeof(reloc));
+       return reloc & mask;
+}
+
+static int __igt_gpu_reloc(struct i915_execbuffer *eb,
+                          struct drm_i915_gem_object *obj)
+{
+       const unsigned int offsets[] = { 8, 3, 0 };
+       const u64 mask =
+               GENMASK_ULL(eb->reloc_cache.use_64bit_reloc ? 63 : 31, 0);
+       const u32 *map = page_mask_bits(obj->mm.mapping);
+       struct i915_request *rq;
+       struct i915_vma *vma;
+       int err;
+       int i;
+
+       vma = i915_vma_instance(obj, eb->context->vm, NULL);
+       if (IS_ERR(vma))
+               return PTR_ERR(vma);
+
+       err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_HIGH);
+       if (err)
+               return err;
+
+       /* 8-Byte aligned */
+       if (!__reloc_entry_gpu(eb, vma,
+                              offsets[0] * sizeof(u32),
+                              0)) {
+               err = -EIO;
+               goto unpin_vma;
+       }
+
+       /* !8-Byte aligned */
+       if (!__reloc_entry_gpu(eb, vma,
+                              offsets[1] * sizeof(u32),
+                              1)) {
+               err = -EIO;
+               goto unpin_vma;
+       }
+
+       /* Skip to the end of the cmd page */
+       i = PAGE_SIZE / sizeof(u32) - RELOC_TAIL - 1;
+       i -= eb->reloc_cache.rq_size;
+       memset32(eb->reloc_cache.rq_cmd + eb->reloc_cache.rq_size,
+                MI_NOOP, i);
+       eb->reloc_cache.rq_size += i;
+
+       /* Force batch chaining */
+       if (!__reloc_entry_gpu(eb, vma,
+                              offsets[2] * sizeof(u32),
+                              2)) {
+               err = -EIO;
+               goto unpin_vma;
+       }
+
+       GEM_BUG_ON(!eb->reloc_cache.rq);
+       rq = i915_request_get(eb->reloc_cache.rq);
+       err = reloc_gpu_flush(&eb->reloc_cache);
+       if (err)
+               goto put_rq;
+       GEM_BUG_ON(eb->reloc_cache.rq);
+
+       err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE, HZ / 2);
+       if (err) {
+               intel_gt_set_wedged(eb->engine->gt);
+               goto put_rq;
+       }
+
+       if (!i915_request_completed(rq)) {
+               pr_err("%s: did not wait for relocations!\n", eb->engine->name);
+               err = -EINVAL;
+               goto put_rq;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(offsets); i++) {
+               u64 reloc = read_reloc(map, offsets[i], mask);
+
+               if (reloc != i) {
+                       pr_err("%s[%d]: map[%d] %llx != %x\n",
+                              eb->engine->name, i, offsets[i], reloc, i);
+                       err = -EINVAL;
+               }
+       }
+       if (err)
+               igt_hexdump(map, 4096);
+
+put_rq:
+       i915_request_put(rq);
+unpin_vma:
+       i915_vma_unpin(vma);
+       return err;
+}
+
+static int igt_gpu_reloc(void *arg)
+{
+       struct i915_execbuffer eb;
+       struct drm_i915_gem_object *scratch;
+       int err = 0;
+       u32 *map;
+
+       eb.i915 = arg;
+
+       scratch = i915_gem_object_create_internal(eb.i915, 4096);
+       if (IS_ERR(scratch))
+               return PTR_ERR(scratch);
+
+       map = i915_gem_object_pin_map(scratch, I915_MAP_WC);
+       if (IS_ERR(map)) {
+               err = PTR_ERR(map);
+               goto err_scratch;
+       }
+
+       for_each_uabi_engine(eb.engine, eb.i915) {
+               reloc_cache_init(&eb.reloc_cache, eb.i915);
+               memset(map, POISON_INUSE, 4096);
+
+               intel_engine_pm_get(eb.engine);
+               eb.context = intel_context_create(eb.engine);
+               if (IS_ERR(eb.context)) {
+                       err = PTR_ERR(eb.context);
+                       goto err_pm;
+               }
+
+               err = intel_context_pin(eb.context);
+               if (err)
+                       goto err_put;
+
+               err = __igt_gpu_reloc(&eb, scratch);
+
+               intel_context_unpin(eb.context);
+err_put:
+               intel_context_put(eb.context);
+err_pm:
+               intel_engine_pm_put(eb.engine);
+               if (err)
+                       break;
+       }
+
+       if (igt_flush_test(eb.i915))
+               err = -EIO;
+
+err_scratch:
+       i915_gem_object_put(scratch);
+       return err;
+}
+
+int i915_gem_execbuffer_live_selftests(struct drm_i915_private *i915)
+{
+       static const struct i915_subtest tests[] = {
+               SUBTEST(igt_gpu_reloc),
+       };
+
+       if (intel_gt_is_wedged(&i915->gt))
+               return 0;
+
+       return i915_live_subtests(tests, i915);
+}
 
        return intel_gt_live_subtests(tests, &i915->gt);
 }
 
-static void hexdump(const void *buf, size_t len)
-{
-       const size_t rowsize = 8 * sizeof(u32);
-       const void *prev = NULL;
-       bool skip = false;
-       size_t pos;
-
-       for (pos = 0; pos < len; pos += rowsize) {
-               char line[128];
-
-               if (prev && !memcmp(prev, buf + pos, rowsize)) {
-                       if (!skip) {
-                               pr_info("*\n");
-                               skip = true;
-                       }
-                       continue;
-               }
-
-               WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
-                                               rowsize, sizeof(u32),
-                                               line, sizeof(line),
-                                               false) >= sizeof(line));
-               pr_info("[%04zx] %s\n", pos, line);
-
-               prev = buf + pos;
-               skip = false;
-       }
-}
-
 static int emit_semaphore_signal(struct intel_context *ce, void *slot)
 {
        const u32 offset =
 
                if (err) {
                        pr_info("%s: HW register image:\n", engine->name);
-                       hexdump(hw, PAGE_SIZE);
+                       igt_hexdump(hw, PAGE_SIZE);
 
                        pr_info("%s: SW register image:\n", engine->name);
-                       hexdump(lrc, PAGE_SIZE);
+                       igt_hexdump(lrc, PAGE_SIZE);
                }
 
                shmem_unpin_map(engine->default_state, hw);
 
 #define igt_timeout(t, fmt, ...) \
        __igt_timeout((t), KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__)
 
+void igt_hexdump(const void *buf, size_t len);
+
 #endif /* !__I915_SELFTEST_H__ */
 
 selftest(evict, i915_gem_evict_live_selftests)
 selftest(hugepages, i915_gem_huge_page_live_selftests)
 selftest(gem_contexts, i915_gem_context_live_selftests)
+selftest(gem_execbuf, i915_gem_execbuffer_live_selftests)
 selftest(blt, i915_gem_object_blt_live_selftests)
 selftest(client, i915_gem_client_blt_live_selftests)
 selftest(reset, intel_reset_live_selftests)
 
        return true;
 }
 
+void igt_hexdump(const void *buf, size_t len)
+{
+       const size_t rowsize = 8 * sizeof(u32);
+       const void *prev = NULL;
+       bool skip = false;
+       size_t pos;
+
+       for (pos = 0; pos < len; pos += rowsize) {
+               char line[128];
+
+               if (prev && !memcmp(prev, buf + pos, rowsize)) {
+                       if (!skip) {
+                               pr_info("*\n");
+                               skip = true;
+                       }
+                       continue;
+               }
+
+               WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+                                               rowsize, sizeof(u32),
+                                               line, sizeof(line),
+                                               false) >= sizeof(line));
+               pr_info("[%04zx] %s\n", pos, line);
+
+               prev = buf + pos;
+               skip = false;
+       }
+}
+
 module_param_named(st_random_seed, i915_selftest.random_seed, uint, 0400);
 module_param_named(st_timeout, i915_selftest.timeout_ms, uint, 0400);
 module_param_named(st_filter, i915_selftest.filter, charp, 0400);