bool                    preamble_presented;
        enum amd_sched_priority init_priority;
        enum amd_sched_priority override_priority;
+       struct mutex            lock;
 };
 
 struct amdgpu_ctx_mgr {
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
                     struct drm_file *filp);
 
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
+
 /*
  * file private structure
  */
 
                goto free_chunk;
        }
 
+       mutex_lock(&p->ctx->lock);
+
        /* get chunks */
        chunk_array_user = u64_to_user_ptr(cs->in.chunks);
        if (copy_from_user(chunk_array, chunk_array_user,
 
        dma_fence_put(parser->fence);
 
-       if (parser->ctx)
+       if (parser->ctx) {
+               mutex_unlock(&parser->ctx->lock);
                amdgpu_ctx_put(parser->ctx);
+       }
        if (parser->bo_list)
                amdgpu_bo_list_put(parser->bo_list);
 
                        r = amdgpu_ring_parse_cs(ring, p, j);
                        if (r)
                                return r;
-
                }
-
                j++;
        }
 
            parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
                return -EINVAL;
 
-       return 0;
+       return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
 }
 
 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
 
        if (!ctx->fences)
                return -ENOMEM;
 
+       mutex_init(&ctx->lock);
+
        for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
                ctx->rings[i].sequence = 1;
                ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
                                      &ctx->rings[i].entity);
 
        amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+
+       mutex_destroy(&ctx->lock);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 
        idx = seq & (amdgpu_sched_jobs - 1);
        other = cring->fences[idx];
-       if (other) {
-               signed long r;
-               r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
-               if (r < 0)
-                       return r;
-       }
+       if (other)
+               BUG_ON(!dma_fence_is_signaled(other));
 
        dma_fence_get(fence);
 
        }
 }
 
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
+{
+       struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
+       unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
+       struct dma_fence *other = cring->fences[idx];
+
+       if (other) {
+               signed long r;
+               r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+               if (r < 0) {
+                       DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+                       return r;
+               }
+       }
+
+       return 0;
+}
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 {
        mutex_init(&mgr->lock);