.map_queues_size = 7,
        .unmap_queues_size = 6,
        .query_status_size = 7,
-       .invalidate_tlbs_size = 12,
+       .invalidate_tlbs_size = 2,
 };
 
 static void gfx_v10_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
 
        .map_queues_size = 7,
        .unmap_queues_size = 6,
        .query_status_size = 7,
-       .invalidate_tlbs_size = 12,
+       .invalidate_tlbs_size = 2,
 };
 
 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
 
 
        if (amdgpu_emu_mode == 0 && ring->sched.ready) {
                spin_lock(&adev->gfx.kiq.ring_lock);
-               amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
+               /* 2 dwords flush + 8 dwords fence */
+               amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
                kiq->pmf->kiq_invalidate_tlbs(ring,
                                        pasid, flush_type, all_hub);
                amdgpu_fence_emit_polling(ring, &seq);
 
 
        if (ring->sched.ready) {
                spin_lock(&adev->gfx.kiq.ring_lock);
-               amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size);
+               /* 2 dwords flush + 8 dwords fence */
+               amdgpu_ring_alloc(ring, kiq->pmf->invalidate_tlbs_size + 8);
                kiq->pmf->kiq_invalidate_tlbs(ring,
                                        pasid, flush_type, all_hub);
                amdgpu_fence_emit_polling(ring, &seq);