void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
-int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
-int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
+int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
+int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
 
 }
 
 static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
-                                unsigned ring, bool intr)
+                                unsigned ring, bool intr, bool lock_ring)
 {
        unsigned long timeout, last_activity;
        uint64_t seq;
                        if (seq != atomic64_read(&rdev->fence_drv[ring].last_seq)) {
                                continue;
                        }
+
+                       if (lock_ring) {
+                               mutex_lock(&rdev->ring_lock);
+                       }
+
                        /* test if somebody else has already decided that this is a lockup */
                        if (last_activity != rdev->fence_drv[ring].last_activity) {
+                               if (lock_ring) {
+                                       mutex_unlock(&rdev->ring_lock);
+                               }
                                continue;
                        }
 
                                        rdev->fence_drv[i].last_activity = jiffies;
                                }
 
-                               /* change last activity so nobody else think there is a lockup */
-                               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-                                       rdev->fence_drv[i].last_activity = jiffies;
-                               }
-
                                /* mark the ring as not ready any more */
                                rdev->ring[ring].ready = false;
+                               if (lock_ring) {
+                                       mutex_unlock(&rdev->ring_lock);
+                               }
                                return -EDEADLK;
                        }
+
+                       if (lock_ring) {
+                               mutex_unlock(&rdev->ring_lock);
+                       }
                }
        }
        return 0;
                return -EINVAL;
        }
 
-       r = radeon_fence_wait_seq(fence->rdev, fence->seq, fence->ring, intr);
+       r = radeon_fence_wait_seq(fence->rdev, fence->seq,
+                                 fence->ring, intr, true);
        if (r) {
                return r;
        }
        return 0;
 }
 
-int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
 {
        uint64_t seq;
 
         */
        seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
        if (seq >= rdev->fence_drv[ring].seq) {
-               /* nothing to wait for, last_seq is already the last emited fence */
-               return 0;
+               /* nothing to wait for, last_seq is
+                  already the last emited fence */
+               return -ENOENT;
        }
-       return radeon_fence_wait_seq(rdev, seq, ring, false);
+       return radeon_fence_wait_seq(rdev, seq, ring, false, false);
 }
 
-int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
 {
        /* We are not protected by ring lock when reading current seq
         * but it's ok as wait empty is call from place where no more
         * activity can be scheduled so there won't be concurrent access
         * to seq value.
         */
-       return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq, ring, false);
+       return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq,
+                                    ring, false, false);
 }
 
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 {
        int ring;
 
+       mutex_lock(&rdev->ring_lock);
        for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
                if (!rdev->fence_drv[ring].initialized)
                        continue;
-               radeon_fence_wait_empty(rdev, ring);
+               radeon_fence_wait_empty_locked(rdev, ring);
                wake_up_all(&rdev->fence_drv[ring].queue);
                radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
                rdev->fence_drv[ring].initialized = false;
        }
+       mutex_unlock(&rdev->ring_lock);
 }
 
 
 
        } else {
                struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
                if (ring->ready) {
-                       struct radeon_fence *fence;
-                       radeon_ring_alloc(rdev, ring, 64);
-                       radeon_fence_create(rdev, &fence, radeon_ring_index(rdev, ring));
-                       radeon_fence_emit(rdev, fence);
-                       radeon_ring_commit(rdev, ring);
-                       radeon_fence_wait(fence, false);
-                       radeon_fence_unref(&fence);
+                       radeon_fence_wait_empty_locked(rdev, RADEON_RING_TYPE_GFX_INDEX);
                }
        }
        radeon_unmap_vram_bos(rdev);
 
                if (ndw < ring->ring_free_dw) {
                        break;
                }
-               mutex_unlock(&rdev->ring_lock);
-               r = radeon_fence_wait_next(rdev, radeon_ring_index(rdev, ring));
-               mutex_lock(&rdev->ring_lock);
+               r = radeon_fence_wait_next_locked(rdev, radeon_ring_index(rdev, ring));
                if (r)
                        return r;
        }
 {
        int r;
 
-       mutex_lock(&rdev->ring_lock);
        radeon_ring_free_size(rdev, ring);
        if (ring->rptr == ring->wptr) {
                r = radeon_ring_alloc(rdev, ring, 1);
                        radeon_ring_commit(rdev, ring);
                }
        }
-       mutex_unlock(&rdev->ring_lock);
 }
 
 void radeon_ring_lockup_update(struct radeon_ring *ring)