/* set to DX10/11 mode */
        radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
        radeon_ring_write(ring, 1);
-       /* FIXME: implement */
+
+       if (ring->rptr_save_reg) {
+               uint32_t next_rptr = ring->wptr + 3 + 4;
+               radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+               radeon_ring_write(ring, ((ring->rptr_save_reg - 
+                                         PACKET3_SET_CONFIG_REG_START) >> 2));
+               radeon_ring_write(ring, next_rptr);
+       }
+
        radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
        radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
 
        /* set to DX10/11 mode */
        radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
        radeon_ring_write(ring, 1);
+
+       if (ring->rptr_save_reg) {
+               uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
+               radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+               radeon_ring_write(ring, ((ring->rptr_save_reg - 
+                                         PACKET3_SET_CONFIG_REG_START) >> 2));
+               radeon_ring_write(ring, next_rptr);
+       }
+
        radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
        radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
 
 static void cayman_cp_fini(struct radeon_device *rdev)
 {
+       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        cayman_cp_enable(rdev, false);
-       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+       radeon_ring_fini(rdev, ring);
+       radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 int cayman_cp_resume(struct radeon_device *rdev)
 
 void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size)
 {
        u32 rb_bufsz;
+       int r;
 
        /* Align ring size */
        rb_bufsz = drm_order(ring_size / 8);
        ring_size = (1 << (rb_bufsz + 1)) * 4;
        ring->ring_size = ring_size;
        ring->align_mask = 16 - 1;
+
+       r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
+       if (r) {
+               DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
+               ring->rptr_save_reg = 0;
+       }
 }
 
 void r600_cp_fini(struct radeon_device *rdev)
 {
+       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        r600_cp_stop(rdev);
-       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+       radeon_ring_fini(rdev, ring);
+       radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 
 {
        struct radeon_ring *ring = &rdev->ring[ib->ring];
 
-       /* FIXME: implement */
+       if (ring->rptr_save_reg) {
+               uint32_t next_rptr = ring->wptr + 3 + 4;
+               radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+               radeon_ring_write(ring, ((ring->rptr_save_reg -
+                                        PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+               radeon_ring_write(ring, next_rptr);
+       }
+
        radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
        radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
 
        unsigned                rptr;
        unsigned                rptr_offs;
        unsigned                rptr_reg;
+       unsigned                rptr_save_reg;
        unsigned                wptr;
        unsigned                wptr_old;
        unsigned                wptr_reg;
 
        count = (ring->ring_size / 4) - ring->ring_free_dw;
        seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg));
        seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg));
+       if (ring->rptr_save_reg) {
+               seq_printf(m, "rptr next(0x%04x): 0x%08x\n", ring->rptr_save_reg,
+                          RREG32(ring->rptr_save_reg));
+       }
        seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr);
        seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr);
        seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
 
 
 void r700_cp_fini(struct radeon_device *rdev)
 {
+       struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
        r700_cp_stop(rdev);
-       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+       radeon_ring_fini(rdev, ring);
+       radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 /*
 
        struct radeon_ring *ring = &rdev->ring[ib->ring];
        u32 header;
 
+       if (ring->rptr_save_reg) {
+               uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
+               radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+               radeon_ring_write(ring, ((ring->rptr_save_reg - 
+                                         PACKET3_SET_CONFIG_REG_START) >> 2));
+               radeon_ring_write(ring, next_rptr);
+       }
+
        if (ib->is_const_ib)
                header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
        else
 
 static void si_cp_fini(struct radeon_device *rdev)
 {
+       struct radeon_ring *ring;
        si_cp_enable(rdev, false);
-       radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
-       radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
+
+       ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+       radeon_ring_fini(rdev, ring);
+       radeon_scratch_free(rdev, ring->rptr_save_reg);
+
+       ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+       radeon_ring_fini(rdev, ring);
+       radeon_scratch_free(rdev, ring->rptr_save_reg);
+
+       ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
+       radeon_ring_fini(rdev, ring);
+       radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 static int si_cp_resume(struct radeon_device *rdev)