* @src_offset: src GPU address
  * @dst_offset: dst GPU address
  * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
  *
  * Copy GPU paging using the CP DMA engine (CIK+).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int cik_copy_cpdma(struct radeon_device *rdev,
-                  uint64_t src_offset, uint64_t dst_offset,
-                  unsigned num_gpu_pages,
-                  struct radeon_fence **fence)
+struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
+                                   uint64_t src_offset, uint64_t dst_offset,
+                                   unsigned num_gpu_pages,
+                                   struct reservation_object *resv)
 {
        struct radeon_semaphore *sem = NULL;
+       struct radeon_fence *fence;
        int ring_index = rdev->asic->copy.blit_ring_index;
        struct radeon_ring *ring = &rdev->ring[ring_index];
        u32 size_in_bytes, cur_size_in_bytes, control;
        r = radeon_semaphore_create(rdev, &sem);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
+               return ERR_PTR(r);
        }
 
        size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
-       radeon_semaphore_sync_to(sem, *fence);
+       radeon_semaphore_sync_resv(sem, resv, false);
        radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
        for (i = 0; i < num_loops; i++) {
                dst_offset += cur_size_in_bytes;
        }
 
-       r = radeon_fence_emit(rdev, fence, ring->idx);
+       r = radeon_fence_emit(rdev, &fence, ring->idx);
        if (r) {
                radeon_ring_unlock_undo(rdev, ring);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
        radeon_ring_unlock_commit(rdev, ring, false);
-       radeon_semaphore_free(rdev, &sem, *fence);
+       radeon_semaphore_free(rdev, &sem, fence);
 
-       return r;
+       return fence;
 }
 
 /*
 
  * @src_offset: src GPU address
  * @dst_offset: dst GPU address
  * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
  *
  * Copy GPU paging using the DMA engine (CIK).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int cik_copy_dma(struct radeon_device *rdev,
-                uint64_t src_offset, uint64_t dst_offset,
-                unsigned num_gpu_pages,
-                struct radeon_fence **fence)
+struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
+                                 uint64_t src_offset, uint64_t dst_offset,
+                                 unsigned num_gpu_pages,
+                                 struct reservation_object *resv)
 {
        struct radeon_semaphore *sem = NULL;
+       struct radeon_fence *fence;
        int ring_index = rdev->asic->copy.dma_ring_index;
        struct radeon_ring *ring = &rdev->ring[ring_index];
        u32 size_in_bytes, cur_size_in_bytes;
        r = radeon_semaphore_create(rdev, &sem);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
+               return ERR_PTR(r);
        }
 
        size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
-       radeon_semaphore_sync_to(sem, *fence);
+       radeon_semaphore_sync_resv(sem, resv, false);
        radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
        for (i = 0; i < num_loops; i++) {
                dst_offset += cur_size_in_bytes;
        }
 
-       r = radeon_fence_emit(rdev, fence, ring->idx);
+       r = radeon_fence_emit(rdev, &fence, ring->idx);
        if (r) {
                radeon_ring_unlock_undo(rdev, ring);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
        radeon_ring_unlock_commit(rdev, ring, false);
-       radeon_semaphore_free(rdev, &sem, *fence);
+       radeon_semaphore_free(rdev, &sem, fence);
 
-       return r;
+       return fence;
 }
 
 /**
 
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int evergreen_copy_dma(struct radeon_device *rdev,
-                      uint64_t src_offset, uint64_t dst_offset,
-                      unsigned num_gpu_pages,
-                      struct radeon_fence **fence)
+struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
+                                       uint64_t src_offset,
+                                       uint64_t dst_offset,
+                                       unsigned num_gpu_pages,
+                                       struct reservation_object *resv)
 {
        struct radeon_semaphore *sem = NULL;
+       struct radeon_fence *fence;
        int ring_index = rdev->asic->copy.dma_ring_index;
        struct radeon_ring *ring = &rdev->ring[ring_index];
        u32 size_in_dw, cur_size_in_dw;
        r = radeon_semaphore_create(rdev, &sem);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
+               return ERR_PTR(r);
        }
 
        size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
-       radeon_semaphore_sync_to(sem, *fence);
+       radeon_semaphore_sync_resv(sem, resv, false);
        radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
        for (i = 0; i < num_loops; i++) {
                dst_offset += cur_size_in_dw * 4;
        }
 
-       r = radeon_fence_emit(rdev, fence, ring->idx);
+       r = radeon_fence_emit(rdev, &fence, ring->idx);
        if (r) {
                radeon_ring_unlock_undo(rdev, ring);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
        radeon_ring_unlock_commit(rdev, ring, false);
-       radeon_semaphore_free(rdev, &sem, *fence);
+       radeon_semaphore_free(rdev, &sem, fence);
 
-       return r;
+       return fence;
 }
 
 /**
 
        return false;
 }
 
-int r100_copy_blit(struct radeon_device *rdev,
-                  uint64_t src_offset,
-                  uint64_t dst_offset,
-                  unsigned num_gpu_pages,
-                  struct radeon_fence **fence)
+struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
+                                   uint64_t src_offset,
+                                   uint64_t dst_offset,
+                                   unsigned num_gpu_pages,
+                                   struct reservation_object *resv)
 {
        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+       struct radeon_fence *fence;
        uint32_t cur_pages;
        uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
        uint32_t pitch;
        r = radeon_ring_lock(rdev, ring, ndw);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
-               return -EINVAL;
+               return ERR_PTR(-EINVAL);
        }
        while (num_gpu_pages > 0) {
                cur_pages = num_gpu_pages;
                          RADEON_WAIT_2D_IDLECLEAN |
                          RADEON_WAIT_HOST_IDLECLEAN |
                          RADEON_WAIT_DMA_GUI_IDLE);
-       if (fence) {
-               r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
+       r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
+       if (r) {
+               radeon_ring_unlock_undo(rdev, ring);
+               return ERR_PTR(r);
        }
        radeon_ring_unlock_commit(rdev, ring, false);
-       return r;
+       return fence;
 }
 
 static int r100_cp_wait_for_idle(struct radeon_device *rdev)
 
        return vtx_size;
 }
 
-int r200_copy_dma(struct radeon_device *rdev,
-                 uint64_t src_offset,
-                 uint64_t dst_offset,
-                 unsigned num_gpu_pages,
-                 struct radeon_fence **fence)
+struct radeon_fence *r200_copy_dma(struct radeon_device *rdev,
+                                  uint64_t src_offset,
+                                  uint64_t dst_offset,
+                                  unsigned num_gpu_pages,
+                                  struct reservation_object *resv)
 {
        struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+       struct radeon_fence *fence;
        uint32_t size;
        uint32_t cur_size;
        int i, num_loops;
        r = radeon_ring_lock(rdev, ring, num_loops * 4 + 64);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
+               return ERR_PTR(r);
        }
        /* Must wait for 2D idle & clean before DMA or hangs might happen */
        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
        }
        radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
        radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE);
-       if (fence) {
-               r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
+       r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
+       if (r) {
+               radeon_ring_unlock_undo(rdev, ring);
+               return ERR_PTR(r);
        }
        radeon_ring_unlock_commit(rdev, ring, false);
-       return r;
+       return fence;
 }
 
 
 
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int r600_copy_cpdma(struct radeon_device *rdev,
-                   uint64_t src_offset, uint64_t dst_offset,
-                   unsigned num_gpu_pages,
-                   struct radeon_fence **fence)
+struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
+                                    uint64_t src_offset, uint64_t dst_offset,
+                                    unsigned num_gpu_pages,
+                                    struct reservation_object *resv)
 {
        struct radeon_semaphore *sem = NULL;
+       struct radeon_fence *fence;
        int ring_index = rdev->asic->copy.blit_ring_index;
        struct radeon_ring *ring = &rdev->ring[ring_index];
        u32 size_in_bytes, cur_size_in_bytes, tmp;
        r = radeon_semaphore_create(rdev, &sem);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
+               return ERR_PTR(r);
        }
 
        size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
-       radeon_semaphore_sync_to(sem, *fence);
+       radeon_semaphore_sync_resv(sem, resv, false);
        radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
        radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
        radeon_ring_write(ring, WAIT_CP_DMA_IDLE_bit);
 
-       r = radeon_fence_emit(rdev, fence, ring->idx);
+       r = radeon_fence_emit(rdev, &fence, ring->idx);
        if (r) {
                radeon_ring_unlock_undo(rdev, ring);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
        radeon_ring_unlock_commit(rdev, ring, false);
-       radeon_semaphore_free(rdev, &sem, *fence);
+       radeon_semaphore_free(rdev, &sem, fence);
 
-       return r;
+       return fence;
 }
 
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
 
  * @src_offset: src GPU address
  * @dst_offset: dst GPU address
  * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
  *
  * Copy GPU paging using the DMA engine (r6xx).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int r600_copy_dma(struct radeon_device *rdev,
-                 uint64_t src_offset, uint64_t dst_offset,
-                 unsigned num_gpu_pages,
-                 struct radeon_fence **fence)
+struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
+                                  uint64_t src_offset, uint64_t dst_offset,
+                                  unsigned num_gpu_pages,
+                                  struct reservation_object *resv)
 {
        struct radeon_semaphore *sem = NULL;
+       struct radeon_fence *fence;
        int ring_index = rdev->asic->copy.dma_ring_index;
        struct radeon_ring *ring = &rdev->ring[ring_index];
        u32 size_in_dw, cur_size_in_dw;
        r = radeon_semaphore_create(rdev, &sem);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
+               return ERR_PTR(r);
        }
 
        size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
-       radeon_semaphore_sync_to(sem, *fence);
+       radeon_semaphore_sync_resv(sem, resv, false);
        radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
        for (i = 0; i < num_loops; i++) {
                dst_offset += cur_size_in_dw * 4;
        }
 
-       r = radeon_fence_emit(rdev, fence, ring->idx);
+       r = radeon_fence_emit(rdev, &fence, ring->idx);
        if (r) {
                radeon_ring_unlock_undo(rdev, ring);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
        radeon_ring_unlock_commit(rdev, ring, false);
-       radeon_semaphore_free(rdev, &sem, *fence);
+       radeon_semaphore_free(rdev, &sem, fence);
 
-       return r;
+       return fence;
 }
 
                                  struct radeon_semaphore *semaphore);
 bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
                                struct radeon_semaphore *semaphore);
-void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
-                             struct radeon_fence *fence);
+void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
+                                struct radeon_fence *fence);
+void radeon_semaphore_sync_resv(struct radeon_semaphore *semaphore,
+                               struct reservation_object *resv,
+                               bool shared);
 int radeon_semaphore_sync_rings(struct radeon_device *rdev,
                                struct radeon_semaphore *semaphore,
                                int waiting_ring);
        } display;
        /* copy functions for bo handling */
        struct {
-               int (*blit)(struct radeon_device *rdev,
-                           uint64_t src_offset,
-                           uint64_t dst_offset,
-                           unsigned num_gpu_pages,
-                           struct radeon_fence **fence);
+               struct radeon_fence *(*blit)(struct radeon_device *rdev,
+                                            uint64_t src_offset,
+                                            uint64_t dst_offset,
+                                            unsigned num_gpu_pages,
+                                            struct reservation_object *resv);
                u32 blit_ring_index;
-               int (*dma)(struct radeon_device *rdev,
-                          uint64_t src_offset,
-                          uint64_t dst_offset,
-                          unsigned num_gpu_pages,
-                          struct radeon_fence **fence);
+               struct radeon_fence *(*dma)(struct radeon_device *rdev,
+                                           uint64_t src_offset,
+                                           uint64_t dst_offset,
+                                           unsigned num_gpu_pages,
+                                           struct reservation_object *resv);
                u32 dma_ring_index;
                /* method used for bo copy */
-               int (*copy)(struct radeon_device *rdev,
-                           uint64_t src_offset,
-                           uint64_t dst_offset,
-                           unsigned num_gpu_pages,
-                           struct radeon_fence **fence);
+               struct radeon_fence *(*copy)(struct radeon_device *rdev,
+                                            uint64_t src_offset,
+                                            uint64_t dst_offset,
+                                            unsigned num_gpu_pages,
+                                            struct reservation_object *resv);
                /* ring used for bo copies */
                u32 copy_ring_index;
        } copy;
 #define radeon_hdmi_setmode(rdev, e, m) (rdev)->asic->display.hdmi_setmode((e), (m))
 #define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)]->emit_fence((rdev), (fence))
 #define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)]->emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
-#define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (f))
-#define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (f))
-#define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (f))
+#define radeon_copy_blit(rdev, s, d, np, resv) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (resv))
+#define radeon_copy_dma(rdev, s, d, np, resv) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (resv))
+#define radeon_copy(rdev, s, d, np, resv) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (resv))
 #define radeon_copy_blit_ring_index(rdev) (rdev)->asic->copy.blit_ring_index
 #define radeon_copy_dma_ring_index(rdev) (rdev)->asic->copy.dma_ring_index
 #define radeon_copy_ring_index(rdev) (rdev)->asic->copy.copy_ring_index
 
 int r100_cs_parse(struct radeon_cs_parser *p);
 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg);
-int r100_copy_blit(struct radeon_device *rdev,
-                  uint64_t src_offset,
-                  uint64_t dst_offset,
-                  unsigned num_gpu_pages,
-                  struct radeon_fence **fence);
+struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
+                                   uint64_t src_offset,
+                                   uint64_t dst_offset,
+                                   unsigned num_gpu_pages,
+                                   struct reservation_object *resv);
 int r100_set_surface_reg(struct radeon_device *rdev, int reg,
                         uint32_t tiling_flags, uint32_t pitch,
                         uint32_t offset, uint32_t obj_size);
 /*
  * r200,rv250,rs300,rv280
  */
-extern int r200_copy_dma(struct radeon_device *rdev,
-                        uint64_t src_offset,
-                        uint64_t dst_offset,
-                        unsigned num_gpu_pages,
-                        struct radeon_fence **fence);
+struct radeon_fence *r200_copy_dma(struct radeon_device *rdev,
+                                  uint64_t src_offset,
+                                  uint64_t dst_offset,
+                                  unsigned num_gpu_pages,
+                                  struct reservation_object *resv);
 void r200_set_safe_registers(struct radeon_device *rdev);
 
 /*
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
-int r600_copy_cpdma(struct radeon_device *rdev,
-                   uint64_t src_offset, uint64_t dst_offset,
-                   unsigned num_gpu_pages, struct radeon_fence **fence);
-int r600_copy_dma(struct radeon_device *rdev,
-                 uint64_t src_offset, uint64_t dst_offset,
-                 unsigned num_gpu_pages, struct radeon_fence **fence);
+struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
+                                    uint64_t src_offset, uint64_t dst_offset,
+                                    unsigned num_gpu_pages,
+                                    struct reservation_object *resv);
+struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
+                                  uint64_t src_offset, uint64_t dst_offset,
+                                  unsigned num_gpu_pages,
+                                  struct reservation_object *resv);
 void r600_hpd_init(struct radeon_device *rdev);
 void r600_hpd_fini(struct radeon_device *rdev);
 bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
 void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 void r700_cp_stop(struct radeon_device *rdev);
 void r700_cp_fini(struct radeon_device *rdev);
-int rv770_copy_dma(struct radeon_device *rdev,
-                 uint64_t src_offset, uint64_t dst_offset,
-                 unsigned num_gpu_pages,
-                  struct radeon_fence **fence);
+struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
+                                   uint64_t src_offset, uint64_t dst_offset,
+                                   unsigned num_gpu_pages,
+                                   struct reservation_object *resv);
 u32 rv770_get_xclk(struct radeon_device *rdev);
 int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
 int rv770_get_temp(struct radeon_device *rdev);
                                   struct radeon_fence *fence);
 void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
                                   struct radeon_ib *ib);
-int evergreen_copy_dma(struct radeon_device *rdev,
-                      uint64_t src_offset, uint64_t dst_offset,
-                      unsigned num_gpu_pages,
-                      struct radeon_fence **fence);
+struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
+                                       uint64_t src_offset, uint64_t dst_offset,
+                                       unsigned num_gpu_pages,
+                                       struct reservation_object *resv);
 void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable);
 void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode);
 int evergreen_get_temp(struct radeon_device *rdev);
 void si_vm_fini(struct radeon_device *rdev);
 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
-int si_copy_dma(struct radeon_device *rdev,
-               uint64_t src_offset, uint64_t dst_offset,
-               unsigned num_gpu_pages,
-               struct radeon_fence **fence);
+struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
+                                uint64_t src_offset, uint64_t dst_offset,
+                                unsigned num_gpu_pages,
+                                struct reservation_object *resv);
 
 void si_dma_vm_copy_pages(struct radeon_device *rdev,
                          struct radeon_ib *ib,
                                  struct radeon_semaphore *semaphore,
                                  bool emit_wait);
 void cik_sdma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
-int cik_copy_dma(struct radeon_device *rdev,
-                uint64_t src_offset, uint64_t dst_offset,
-                unsigned num_gpu_pages,
-                struct radeon_fence **fence);
-int cik_copy_cpdma(struct radeon_device *rdev,
-                  uint64_t src_offset, uint64_t dst_offset,
-                  unsigned num_gpu_pages,
-                  struct radeon_fence **fence);
+struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
+                                 uint64_t src_offset, uint64_t dst_offset,
+                                 unsigned num_gpu_pages,
+                                 struct reservation_object *resv);
+struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
+                                   uint64_t src_offset, uint64_t dst_offset,
+                                   unsigned num_gpu_pages,
+                                   struct reservation_object *resv);
 int cik_sdma_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
 
        for (i = 0; i < n; i++) {
                switch (flag) {
                case RADEON_BENCHMARK_COPY_DMA:
-                       r = radeon_copy_dma(rdev, saddr, daddr,
-                                           size / RADEON_GPU_PAGE_SIZE,
-                                           &fence);
+                       fence = radeon_copy_dma(rdev, saddr, daddr,
+                                               size / RADEON_GPU_PAGE_SIZE,
+                                               NULL);
                        break;
                case RADEON_BENCHMARK_COPY_BLIT:
-                       r = radeon_copy_blit(rdev, saddr, daddr,
-                                            size / RADEON_GPU_PAGE_SIZE,
-                                            &fence);
+                       fence = radeon_copy_blit(rdev, saddr, daddr,
+                                                size / RADEON_GPU_PAGE_SIZE,
+                                                NULL);
                        break;
                default:
                        DRM_ERROR("Unknown copy method\n");
-                       r = -EINVAL;
+                       return -EINVAL;
                }
-               if (r)
-                       goto exit_do_move;
+               if (IS_ERR(fence))
+                       return PTR_ERR(fence);
+
                r = radeon_fence_wait(fence, false);
-               if (r)
-                       goto exit_do_move;
                radeon_fence_unref(&fence);
+               if (r)
+                       return r;
        }
        end_jiffies = jiffies;
-       r = jiffies_to_msecs(end_jiffies - start_jiffies);
-
-exit_do_move:
-       if (fence)
-               radeon_fence_unref(&fence);
-       return r;
+       return jiffies_to_msecs(end_jiffies - start_jiffies);
 }
 
 
 
 
        for (i = 0; i < p->nrelocs; i++) {
                struct reservation_object *resv;
-               struct fence *fence;
 
                if (!p->relocs[i].robj)
                        continue;
 
                resv = p->relocs[i].robj->tbo.resv;
-               fence = reservation_object_get_excl(resv);
-
-               radeon_semaphore_sync_to(p->ib.semaphore,
-                                        (struct radeon_fence *)fence);
+               radeon_semaphore_sync_resv(p->ib.semaphore, resv, false);
        }
 }
 
                goto out;
        }
        radeon_cs_sync_rings(parser);
-       radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
+       radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence);
 
        if ((rdev->family >= CHIP_TAHITI) &&
            (parser->chunk_const_ib_idx != -1)) {
 
        if (ib->vm) {
                struct radeon_fence *vm_id_fence;
                vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
-               radeon_semaphore_sync_to(ib->semaphore, vm_id_fence);
+               radeon_semaphore_sync_fence(ib->semaphore, vm_id_fence);
        }
 
        /* sync with other rings */
 
 }
 
 /**
- * radeon_semaphore_sync_to - use the semaphore to sync to a fence
+ * radeon_semaphore_sync_fence - use the semaphore to sync to a fence
  *
  * @semaphore: semaphore object to add fence to
  * @fence: fence to sync to
  *
  * Sync to the fence using this semaphore object
  */
-void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
-                             struct radeon_fence *fence)
+void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
+                                struct radeon_fence *fence)
 {
         struct radeon_fence *other;
 
         semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other);
 }
 
+/**
+ * radeon_semaphore_sync_to - use the semaphore to sync to a reservation object
+ *
+ * @sema: semaphore object to add fence from reservation object to
+ * @resv: reservation object with embedded fence
+ * @shared: true if we should onyl sync to the exclusive fence
+ *
+ * Sync to the fence using this semaphore object
+ */
+void radeon_semaphore_sync_resv(struct radeon_semaphore *sema,
+                               struct reservation_object *resv,
+                               bool shared)
+{
+       struct reservation_object_list *flist;
+       struct fence *f;
+       unsigned i;
+
+       /* always sync to the exclusive fence */
+       f = reservation_object_get_excl(resv);
+       radeon_semaphore_sync_fence(sema, (struct radeon_fence*)f);
+
+       flist = reservation_object_get_list(resv);
+       if (shared || !flist)
+               return;
+
+       for (i = 0; i < flist->shared_count; ++i) {
+               f = rcu_dereference_protected(flist->shared[i],
+                                             reservation_object_held(resv));
+               radeon_semaphore_sync_fence(sema, (struct radeon_fence*)f);
+       }
+}
+
 /**
  * radeon_semaphore_sync_rings - sync ring to all registered fences
  *
 
                radeon_bo_kunmap(gtt_obj[i]);
 
                if (ring == R600_RING_TYPE_DMA_INDEX)
-                       r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+                       fence = radeon_copy_dma(rdev, gtt_addr, vram_addr,
+                                               size / RADEON_GPU_PAGE_SIZE,
+                                               NULL);
                else
-                       r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
-               if (r) {
+                       fence = radeon_copy_blit(rdev, gtt_addr, vram_addr,
+                                                size / RADEON_GPU_PAGE_SIZE,
+                                                NULL);
+               if (IS_ERR(fence)) {
                        DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
+                       r = PTR_ERR(fence);
                        goto out_lclean_unpin;
                }
 
                radeon_bo_kunmap(vram_obj);
 
                if (ring == R600_RING_TYPE_DMA_INDEX)
-                       r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+                       fence = radeon_copy_dma(rdev, vram_addr, gtt_addr,
+                                               size / RADEON_GPU_PAGE_SIZE,
+                                               NULL);
                else
-                       r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
-               if (r) {
+                       fence = radeon_copy_blit(rdev, vram_addr, gtt_addr,
+                                                size / RADEON_GPU_PAGE_SIZE,
+                                                NULL);
+               if (IS_ERR(fence)) {
                        DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
+                       r = PTR_ERR(fence);
                        goto out_lclean_unpin;
                }
 
                        radeon_bo_unreserve(gtt_obj[i]);
                        radeon_bo_unref(>t_obj[i]);
                }
-               if (fence)
+               if (fence && !IS_ERR(fence))
                        radeon_fence_unref(&fence);
                break;
        }
 
        struct radeon_device *rdev;
        uint64_t old_start, new_start;
        struct radeon_fence *fence;
+       unsigned num_pages;
        int r, ridx;
 
        rdev = radeon_get_rdev(bo->bdev);
 
        BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
 
-       /* sync other rings */
-       fence = (struct radeon_fence *)reservation_object_get_excl(bo->resv);
-       r = radeon_copy(rdev, old_start, new_start,
-                       new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
-                       &fence);
-       /* FIXME: handle copy error */
+       num_pages = new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
+       fence = radeon_copy(rdev, old_start, new_start, num_pages, bo->resv);
+       if (IS_ERR(fence))
+               return PTR_ERR(fence);
+
        r = ttm_bo_move_accel_cleanup(bo, &fence->base,
                                      evict, no_wait_gpu, new_mem);
        radeon_fence_unref(&fence);
 
                                    incr, R600_PTE_VALID);
 
        if (ib.length_dw != 0) {
-               struct fence *fence;
-
                radeon_asic_vm_pad_ib(rdev, &ib);
 
-               fence = reservation_object_get_excl(pd->tbo.resv);
-               radeon_semaphore_sync_to(ib.semaphore,
-                                        (struct radeon_fence *)fence);
-
-               radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
+               radeon_semaphore_sync_resv(ib.semaphore, pd->tbo.resv, false);
+               radeon_semaphore_sync_fence(ib.semaphore, vm->last_id_use);
                WARN_ON(ib.length_dw > ndw);
                r = radeon_ib_schedule(rdev, &ib, NULL, false);
                if (r) {
                struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
                unsigned nptes;
                uint64_t pte;
-               struct fence *fence;
 
-               fence = reservation_object_get_excl(pt->tbo.resv);
-               radeon_semaphore_sync_to(ib->semaphore,
-                                        (struct radeon_fence *)fence);
+               radeon_semaphore_sync_resv(ib->semaphore, pt->tbo.resv, false);
 
                if ((addr & ~mask) == (end & ~mask))
                        nptes = end - addr;
        radeon_asic_vm_pad_ib(rdev, &ib);
        WARN_ON(ib.length_dw > ndw);
 
-       radeon_semaphore_sync_to(ib.semaphore, vm->fence);
+       radeon_semaphore_sync_fence(ib.semaphore, vm->fence);
        r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r) {
                radeon_ib_free(rdev, &ib);
 
  * @src_offset: src GPU address
  * @dst_offset: dst GPU address
  * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
  *
  * Copy GPU paging using the DMA engine (r7xx).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int rv770_copy_dma(struct radeon_device *rdev,
-                 uint64_t src_offset, uint64_t dst_offset,
-                 unsigned num_gpu_pages,
-                 struct radeon_fence **fence)
+struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
+                                   uint64_t src_offset, uint64_t dst_offset,
+                                   unsigned num_gpu_pages,
+                                   struct reservation_object *resv)
 {
        struct radeon_semaphore *sem = NULL;
+       struct radeon_fence *fence;
        int ring_index = rdev->asic->copy.dma_ring_index;
        struct radeon_ring *ring = &rdev->ring[ring_index];
        u32 size_in_dw, cur_size_in_dw;
        r = radeon_semaphore_create(rdev, &sem);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
+               return ERR_PTR(r);
        }
 
        size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
-       radeon_semaphore_sync_to(sem, *fence);
+       radeon_semaphore_sync_resv(sem, resv, false);
        radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
        for (i = 0; i < num_loops; i++) {
                dst_offset += cur_size_in_dw * 4;
        }
 
-       r = radeon_fence_emit(rdev, fence, ring->idx);
+       r = radeon_fence_emit(rdev, &fence, ring->idx);
        if (r) {
                radeon_ring_unlock_undo(rdev, ring);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
        radeon_ring_unlock_commit(rdev, ring, false);
-       radeon_semaphore_free(rdev, &sem, *fence);
+       radeon_semaphore_free(rdev, &sem, fence);
 
-       return r;
+       return fence;
 }
 
  * @src_offset: src GPU address
  * @dst_offset: dst GPU address
  * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
  *
  * Copy GPU paging using the DMA engine (SI).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int si_copy_dma(struct radeon_device *rdev,
-               uint64_t src_offset, uint64_t dst_offset,
-               unsigned num_gpu_pages,
-               struct radeon_fence **fence)
+struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
+                                uint64_t src_offset, uint64_t dst_offset,
+                                unsigned num_gpu_pages,
+                                struct reservation_object *resv)
 {
        struct radeon_semaphore *sem = NULL;
+       struct radeon_fence *fence;
        int ring_index = rdev->asic->copy.dma_ring_index;
        struct radeon_ring *ring = &rdev->ring[ring_index];
        u32 size_in_bytes, cur_size_in_bytes;
        r = radeon_semaphore_create(rdev, &sem);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
-               return r;
+               return ERR_PTR(r);
        }
 
        size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
        if (r) {
                DRM_ERROR("radeon: moving bo (%d).\n", r);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
-       radeon_semaphore_sync_to(sem, *fence);
+       radeon_semaphore_sync_resv(sem, resv, false);
        radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
        for (i = 0; i < num_loops; i++) {
                dst_offset += cur_size_in_bytes;
        }
 
-       r = radeon_fence_emit(rdev, fence, ring->idx);
+       r = radeon_fence_emit(rdev, &fence, ring->idx);
        if (r) {
                radeon_ring_unlock_undo(rdev, ring);
                radeon_semaphore_free(rdev, &sem, NULL);
-               return r;
+               return ERR_PTR(r);
        }
 
        radeon_ring_unlock_commit(rdev, ring, false);
-       radeon_semaphore_free(rdev, &sem, *fence);
+       radeon_semaphore_free(rdev, &sem, fence);
 
-       return r;
+       return fence;
 }