radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
        radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
        radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        for (i = 0; i < rdev->usec_timeout; i++) {
                tmp = RREG32(scratch);
                return r;
        }
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        radeon_semaphore_free(rdev, &sem, *fence);
 
        return r;
        ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
        ib.ptr[2] = 0xDEADBEEF;
        ib.length_dw = 3;
-       r = radeon_ib_schedule(rdev, &ib, NULL);
+       r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r) {
                radeon_scratch_free(rdev, scratch);
                radeon_ib_free(rdev, &ib);
        radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
        radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        return 0;
 }
 
                return r;
        }
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        radeon_semaphore_free(rdev, &sem, *fence);
 
        return r;
        radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr));
        radeon_ring_write(ring, 1); /* number of DWs to follow */
        radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        for (i = 0; i < rdev->usec_timeout; i++) {
                tmp = readl(ptr);
        ib.ptr[4] = 0xDEADBEEF;
        ib.length_dw = 5;
 
-       r = radeon_ib_schedule(rdev, &ib, NULL);
+       r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r) {
                radeon_ib_free(rdev, &ib);
                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
 
        radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
        radeon_ring_write(ring, 0);
        radeon_ring_write(ring, 0);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        cp_me = 0xff;
        WREG32(CP_ME_CNTL, cp_me);
        radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
        radeon_ring_write(ring, 0x00000010); /*  */
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        return 0;
 }
 
                return r;
        }
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        radeon_semaphore_free(rdev, &sem, *fence);
 
        return r;
 
        radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
        radeon_ring_write(ring, 0);
        radeon_ring_write(ring, 0);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        cayman_cp_enable(rdev, true);
 
        radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
        radeon_ring_write(ring, 0x00000010); /*  */
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        /* XXX init other rings */
 
 
        if (fence) {
                r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
        }
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        return r;
 }
 
                          RADEON_ISYNC_ANY3D_IDLE2D |
                          RADEON_ISYNC_WAIT_IDLEGUI |
                          RADEON_ISYNC_CPSCRATCH_IDLEGUI);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 }
 
 
        }
        radeon_ring_write(ring, PACKET0(scratch, 0));
        radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        for (i = 0; i < rdev->usec_timeout; i++) {
                tmp = RREG32(scratch);
                if (tmp == 0xDEADBEEF) {
        ib.ptr[6] = PACKET2(0);
        ib.ptr[7] = PACKET2(0);
        ib.length_dw = 8;
-       r = radeon_ib_schedule(rdev, &ib, NULL);
+       r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r) {
                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
                goto free_ib;
 
        if (fence) {
                r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
        }
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        return r;
 }
 
 
        radeon_ring_write(ring,
                          R300_GEOMETRY_ROUND_NEAREST |
                          R300_COLOR_ROUND_NEAREST);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 }
 
 static void r300_errata(struct radeon_device *rdev)
 
        radeon_ring_write(ring, PACKET0(R300_CP_RESYNC_ADDR, 1));
        radeon_ring_write(ring, rdev->config.r300.resync_scratch);
        radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 }
 
 static void r420_cp_errata_fini(struct radeon_device *rdev)
        radeon_ring_lock(rdev, ring, 8);
        radeon_ring_write(ring, PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
        radeon_ring_write(ring, R300_RB3D_DC_FINISH);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        radeon_scratch_free(rdev, rdev->config.r300.resync_scratch);
 }
 
 
        radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
        radeon_ring_write(ring, 0);
        radeon_ring_write(ring, 0);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        cp_me = 0xff;
        WREG32(R_0086D8_CP_ME_CNTL, cp_me);
        radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
        radeon_ring_write(ring, ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
        radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        for (i = 0; i < rdev->usec_timeout; i++) {
                tmp = RREG32(scratch);
                if (tmp == 0xDEADBEEF)
                return r;
        }
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        radeon_semaphore_free(rdev, &sem, *fence);
 
        return r;
        ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
        ib.ptr[2] = 0xDEADBEEF;
        ib.length_dw = 3;
-       r = radeon_ib_schedule(rdev, &ib, NULL);
+       r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r) {
                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
                goto free_ib;
 
        radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
        radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xff);
        radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        for (i = 0; i < rdev->usec_timeout; i++) {
                tmp = readl(ptr);
        ib.ptr[3] = 0xDEADBEEF;
        ib.length_dw = 4;
 
-       r = radeon_ib_schedule(rdev, &ib, NULL);
+       r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r) {
                radeon_ib_free(rdev, &ib);
                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
                return r;
        }
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        radeon_semaphore_free(rdev, &sem, *fence);
 
        return r;
 
                  unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
-                      struct radeon_ib *const_ib);
+                      struct radeon_ib *const_ib, bool hdp_flush);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
 int radeon_ib_ring_tests(struct radeon_device *rdev);
 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp);
 int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
 int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
-void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp);
-void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp);
+void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *cp,
+                       bool hdp_flush);
+void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *cp,
+                              bool hdp_flush);
 void radeon_ring_undo(struct radeon_ring *ring);
 void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp);
 int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 
                radeon_vce_note_usage(rdev);
 
        radeon_cs_sync_rings(parser);
-       r = radeon_ib_schedule(rdev, &parser->ib, NULL);
+       r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
        if (r) {
                DRM_ERROR("Failed to schedule IB !\n");
        }
 
        if ((rdev->family >= CHIP_TAHITI) &&
            (parser->chunk_const_ib_idx != -1)) {
-               r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib);
+               r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true);
        } else {
-               r = radeon_ib_schedule(rdev, &parser->ib, NULL);
+               r = radeon_ib_schedule(rdev, &parser->ib, NULL, true);
        }
 
 out:
 
  * @rdev: radeon_device pointer
  * @ib: IB object to schedule
  * @const_ib: Const IB to schedule (SI only)
+ * @hdp_flush: Whether or not to perform an HDP cache flush
  *
  * Schedule an IB on the associated ring (all asics).
  * Returns 0 on success, error on failure.
  * to SI there was just a DE IB.
  */
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
-                      struct radeon_ib *const_ib)
+                      struct radeon_ib *const_ib, bool hdp_flush)
 {
        struct radeon_ring *ring = &rdev->ring[ib->ring];
        int r = 0;
        if (ib->vm)
                radeon_vm_fence(rdev, ib->vm, ib->fence);
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, hdp_flush);
        return 0;
 }
 
 
  *
  * @rdev: radeon_device pointer
  * @ring: radeon_ring structure holding ring information
+ * @hdp_flush: Whether or not to perform an HDP cache flush
  *
  * Update the wptr (write pointer) to tell the GPU to
  * execute new commands on the ring buffer (all asics).
  */
-void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
+void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring,
+                       bool hdp_flush)
 {
        /* If we are emitting the HDP flush via the ring buffer, we need to
         * do it before padding.
         */
-       if (rdev->asic->ring[ring->idx]->hdp_flush)
+       if (hdp_flush && rdev->asic->ring[ring->idx]->hdp_flush)
                rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring);
        /* We pad to match fetch size */
        while (ring->wptr & ring->align_mask) {
        /* If we are emitting the HDP flush via MMIO, we need to do it after
         * all CPU writes to VRAM finished.
         */
-       if (rdev->asic->mmio_hdp_flush)
+       if (hdp_flush && rdev->asic->mmio_hdp_flush)
                rdev->asic->mmio_hdp_flush(rdev);
        radeon_ring_set_wptr(rdev, ring);
 }
  *
  * @rdev: radeon_device pointer
  * @ring: radeon_ring structure holding ring information
+ * @hdp_flush: Whether or not to perform an HDP cache flush
  *
  * Call radeon_ring_commit() then unlock the ring (all asics).
  */
-void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring)
+void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring,
+                              bool hdp_flush)
 {
-       radeon_ring_commit(rdev, ring);
+       radeon_ring_commit(rdev, ring, hdp_flush);
        mutex_unlock(&rdev->ring_lock);
 }
 
                radeon_ring_write(ring, data[i]);
        }
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        kfree(data);
        return 0;
 }
 
                        continue;
                }
 
-               radeon_ring_commit(rdev, &rdev->ring[i]);
+               radeon_ring_commit(rdev, &rdev->ring[i], false);
                radeon_fence_note_sync(fence, ring);
 
                semaphore->gpu_addr += 8;
 
                        return r;
                }
                radeon_fence_emit(rdev, fence, ring->idx);
-               radeon_ring_unlock_commit(rdev, ring);
+               radeon_ring_unlock_commit(rdev, ring, false);
        }
        return 0;
 }
                goto out_cleanup;
        }
        radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-       radeon_ring_unlock_commit(rdev, ringA);
+       radeon_ring_unlock_commit(rdev, ringA, false);
 
        r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1);
        if (r)
                goto out_cleanup;
        }
        radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-       radeon_ring_unlock_commit(rdev, ringA);
+       radeon_ring_unlock_commit(rdev, ringA, false);
 
        r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2);
        if (r)
                goto out_cleanup;
        }
        radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore);
-       radeon_ring_unlock_commit(rdev, ringB);
+       radeon_ring_unlock_commit(rdev, ringB, false);
 
        r = radeon_fence_wait(fence1, false);
        if (r) {
                goto out_cleanup;
        }
        radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore);
-       radeon_ring_unlock_commit(rdev, ringB);
+       radeon_ring_unlock_commit(rdev, ringB, false);
 
        r = radeon_fence_wait(fence2, false);
        if (r) {
                goto out_cleanup;
        }
        radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-       radeon_ring_unlock_commit(rdev, ringA);
+       radeon_ring_unlock_commit(rdev, ringA, false);
 
        r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA);
        if (r)
                goto out_cleanup;
        }
        radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore);
-       radeon_ring_unlock_commit(rdev, ringB);
+       radeon_ring_unlock_commit(rdev, ringB, false);
        r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB);
        if (r)
                goto out_cleanup;
                goto out_cleanup;
        }
        radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore);
-       radeon_ring_unlock_commit(rdev, ringC);
+       radeon_ring_unlock_commit(rdev, ringC, false);
 
        for (i = 0; i < 30; ++i) {
                mdelay(100);
                goto out_cleanup;
        }
        radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore);
-       radeon_ring_unlock_commit(rdev, ringC);
+       radeon_ring_unlock_commit(rdev, ringC, false);
 
        mdelay(1000);
 
 
                ib.ptr[i] = PACKET2(0);
        ib.length_dw = 16;
 
-       r = radeon_ib_schedule(rdev, &ib, NULL);
+       r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r)
                goto err;
        ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
 
        for (i = ib.length_dw; i < ib_size_dw; ++i)
                ib.ptr[i] = 0x0;
 
-       r = radeon_ib_schedule(rdev, &ib, NULL);
+       r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r) {
                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
        }
        for (i = ib.length_dw; i < ib_size_dw; ++i)
                ib.ptr[i] = 0x0;
 
-       r = radeon_ib_schedule(rdev, &ib, NULL);
+       r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r) {
                DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
        }
                return r;
        }
        radeon_ring_write(ring, VCE_CMD_END);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        for (i = 0; i < rdev->usec_timeout; i++) {
                if (vce_v1_0_get_rptr(rdev, ring) != rptr)
 
        radeon_asic_vm_pad_ib(rdev, &ib);
        WARN_ON(ib.length_dw > 64);
 
-       r = radeon_ib_schedule(rdev, &ib, NULL);
+       r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r)
                 goto error;
 
                radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
                radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
                WARN_ON(ib.length_dw > ndw);
-               r = radeon_ib_schedule(rdev, &ib, NULL);
+               r = radeon_ib_schedule(rdev, &ib, NULL, false);
                if (r) {
                        radeon_ib_free(rdev, &ib);
                        return r;
        WARN_ON(ib.length_dw > ndw);
 
        radeon_semaphore_sync_to(ib.semaphore, vm->fence);
-       r = radeon_ib_schedule(rdev, &ib, NULL);
+       r = radeon_ib_schedule(rdev, &ib, NULL, false);
        if (r) {
                radeon_ib_free(rdev, &ib);
                return r;
 
        radeon_ring_write(ring, GEOMETRY_ROUND_NEAREST | COLOR_ROUND_NEAREST);
        radeon_ring_write(ring, PACKET0(0x20C8, 0));
        radeon_ring_write(ring, 0);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 }
 
 int rv515_mc_wait_for_idle(struct radeon_device *rdev)
 
                return r;
        }
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        radeon_semaphore_free(rdev, &sem, *fence);
 
        return r;
 
        radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
        radeon_ring_write(ring, 0xc000);
        radeon_ring_write(ring, 0xe000);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        si_cp_enable(rdev, true);
 
        radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
        radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
        for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
                ring = &rdev->ring[i];
                radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
                radeon_ring_write(ring, 0);
 
-               radeon_ring_unlock_commit(rdev, ring);
+               radeon_ring_unlock_commit(rdev, ring, false);
        }
 
        return 0;
 
                return r;
        }
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        radeon_semaphore_free(rdev, &sem, *fence);
 
        return r;
 
        radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
        radeon_ring_write(ring, 3);
 
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
 
 done:
        /* lower clocks again */
        }
        radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
        radeon_ring_write(ring, 0xDEADBEEF);
-       radeon_ring_unlock_commit(rdev, ring);
+       radeon_ring_unlock_commit(rdev, ring, false);
        for (i = 0; i < rdev->usec_timeout; i++) {
                tmp = RREG32(UVD_CONTEXT_ID);
                if (tmp == 0xDEADBEEF)