if (r)
                return r;
 
-       r = amdgpu_sync_vm_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
+       r = amdgpu_sync_fence(&p->job->sync, fpriv->prt_va->last_pt_update);
        if (r)
                return r;
 
                if (r)
                        return r;
 
-               r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
+               r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
                if (r)
                        return r;
        }
                if (r)
                        return r;
 
-               r = amdgpu_sync_vm_fence(&p->job->sync, bo_va->last_pt_update);
+               r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
                if (r)
                        return r;
        }
        if (r)
                return r;
 
-       r = amdgpu_sync_vm_fence(&p->job->sync, vm->last_update);
+       r = amdgpu_sync_fence(&p->job->sync, vm->last_update);
        if (r)
                return r;
 
 
        unsigned vmhub = ring->funcs->vmhub;
        uint64_t fence_context = adev->fence_context + ring->idx;
        bool needs_flush = vm->use_cpu_for_update;
-       uint64_t updates = sync->last_vm_update;
+       uint64_t updates = amdgpu_vm_tlb_seq(vm);
        int r;
 
        *id = vm->reserved_vmid[vmhub];
        unsigned vmhub = ring->funcs->vmhub;
        struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
        uint64_t fence_context = adev->fence_context + ring->idx;
-       uint64_t updates = sync->last_vm_update;
+       uint64_t updates = amdgpu_vm_tlb_seq(vm);
        int r;
 
        job->vm_needs_flush = vm->use_cpu_for_update;
                        if (r)
                                goto error;
 
-                       id->flushed_updates = sync->last_vm_update;
+                       id->flushed_updates = amdgpu_vm_tlb_seq(vm);
                        job->vm_needs_flush = true;
                }
 
 
 void amdgpu_sync_create(struct amdgpu_sync *sync)
 {
        hash_init(sync->fences);
-       sync->last_vm_update = 0;
 }
 
 /**
        return 0;
 }
 
-/**
- * amdgpu_sync_vm_fence - remember to sync to this VM fence
- *
- * @sync: sync object to add fence to
- * @fence: the VM fence to add
- *
- * Add the fence to the sync object and remember it as VM update.
- */
-int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence)
-{
-       if (!fence)
-               return 0;
-
-       sync->last_vm_update = max(sync->last_vm_update, fence->seqno);
-       return amdgpu_sync_fence(sync, fence);
-}
-
 /* Determine based on the owner and mode if we should sync to a fence or not */
 static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
                                   enum amdgpu_sync_mode mode,
                }
        }
 
-       clone->last_vm_update = source->last_vm_update;
-
        return 0;
 }
 
 
  */
 struct amdgpu_sync {
        DECLARE_HASHTABLE(fences, 4);
-       uint64_t        last_vm_update;
 };
 
 void amdgpu_sync_create(struct amdgpu_sync *sync);
 int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f);
-int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence);
 int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
                     struct dma_resv *resv, enum amdgpu_sync_mode mode,
                     void *owner);
 
        struct dma_fence_cb cb;
 };
 
+/**
+ * amdgpu_vm_tlb_seq_cb - Helper to increment the TLB flush sequence
+ */
+struct amdgpu_vm_tlb_seq_cb {
+       /**
+        * @vm: pointer to the amdgpu_vm structure to set the fence sequence on
+        */
+       struct amdgpu_vm *vm;
+
+       /**
+        * @cb: callback
+        */
+       struct dma_fence_cb cb;
+};
+
 /**
  * amdgpu_vm_set_pasid - manage pasid and vm ptr mapping
  *
        return r;
 }
 
+/**
+ * amdgpu_vm_tlb_seq_cb - make sure to increment tlb sequence
+ * @fence: unused
+ * @cb: the callback structure
+ *
+ * Increments the tlb sequence to make sure that future CS execute a VM flush.
+ */
+static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence,
+                                struct dma_fence_cb *cb)
+{
+       struct amdgpu_vm_tlb_seq_cb *tlb_cb;
+
+       tlb_cb = container_of(cb, typeof(*tlb_cb), cb);
+       atomic64_inc(&tlb_cb->vm->tlb_seq);
+       kfree(tlb_cb);
+}
+
 /**
  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
  *
                                bool *table_freed)
 {
        struct amdgpu_vm_update_params params;
+       struct amdgpu_vm_tlb_seq_cb *tlb_cb;
        struct amdgpu_res_cursor cursor;
        enum amdgpu_sync_mode sync_mode;
        int r, idx;
        if (!drm_dev_enter(adev_to_drm(adev), &idx))
                return -ENODEV;
 
+       tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL);
+       if (!tlb_cb) {
+               r = -ENOMEM;
+               goto error_unlock;
+       }
+
        memset(¶ms, 0, sizeof(params));
        params.adev = adev;
        params.vm = vm;
        amdgpu_vm_eviction_lock(vm);
        if (vm->evicting) {
                r = -EBUSY;
-               goto error_unlock;
+               goto error_free;
        }
 
        if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) {
 
        r = vm->update_funcs->prepare(¶ms, resv, sync_mode);
        if (r)
-               goto error_unlock;
+               goto error_free;
 
        amdgpu_res_first(pages_addr ? NULL : res, offset,
                         (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, &cursor);
                tmp = start + num_entries;
                r = amdgpu_vm_ptes_update(¶ms, start, tmp, addr, flags);
                if (r)
-                       goto error_unlock;
+                       goto error_free;
 
                amdgpu_res_next(&cursor, num_entries * AMDGPU_GPU_PAGE_SIZE);
                start = tmp;
 
        r = vm->update_funcs->commit(¶ms, fence);
 
+       if (!unlocked && (!(flags & AMDGPU_PTE_VALID) || params.table_freed)) {
+               tlb_cb->vm = vm;
+               if (!fence || !*fence ||
+                   dma_fence_add_callback(*fence, &tlb_cb->cb,
+                                          amdgpu_vm_tlb_seq_cb))
+                       amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
+               tlb_cb = NULL;
+       }
+
        if (table_freed)
                *table_freed = *table_freed || params.table_freed;
 
+error_free:
+       kfree(tlb_cb);
+
 error_unlock:
        amdgpu_vm_eviction_unlock(vm);
        drm_dev_exit(idx);
 
        struct drm_sched_entity immediate;
        struct drm_sched_entity delayed;
 
+       /* Last finished delayed update */
+       atomic64_t              tlb_seq;
+
        /* Last unlocked submission to the scheduler entities */
        struct dma_fence        *last_unlocked;
 
 void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
 #endif
 
+/**
+ * amdgpu_vm_tlb_seq - return tlb flush sequence number
+ * @vm: the amdgpu_vm structure to query
+ *
+ * Returns the tlb flush sequence number which indicates that the VM TLBs needs
+ * to be invalidated whenever the sequence number change.
+ */
+static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
+{
+       return atomic64_read(&vm->tlb_seq);
+}
+
 #endif