{
        uint32_t dw2 = 0;
 
+       if (amdgpu_sriov_vf(ring->adev))
+               gfx_v8_0_ring_emit_ce_meta_init(ring,
+                       (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
+
        dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
        if (flags & AMDGPU_HAVE_CTX_SWITCH) {
                gfx_v8_0_ring_emit_vgt_flush(ring);
        amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
        amdgpu_ring_write(ring, dw2);
        amdgpu_ring_write(ring, 0);
+
+       if (amdgpu_sriov_vf(ring->adev))
+               gfx_v8_0_ring_emit_de_meta_init(ring,
+                       (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr);
 }
 
 static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
                7 + /* gfx_v8_0_ring_emit_pipeline_sync */
                128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
                2 + /* gfx_v8_ring_emit_sb */
-               3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
+               3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */
        .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
        .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
        .emit_fence = gfx_v8_0_ring_emit_fence_gfx,