indirect1_start = 16;
        /* cp setup */
        WREG32(0x718, pre_write_timer | (pre_write_limit << 28));
-       WREG32(RADEON_CP_RB_CNTL,
-#ifdef __BIG_ENDIAN
-              RADEON_BUF_SWAP_32BIT |
-#endif
-              REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
+       tmp = (REG_SET(RADEON_RB_BUFSZ, rb_bufsz) |
               REG_SET(RADEON_RB_BLKSZ, rb_blksz) |
               REG_SET(RADEON_MAX_FETCH, max_fetch) |
               RADEON_RB_NO_UPDATE);
+#ifdef __BIG_ENDIAN
+       tmp |= RADEON_BUF_SWAP_32BIT;
+#endif
+       WREG32(RADEON_CP_RB_CNTL, tmp);
+
        /* Set ring address */
        DRM_INFO("radeon: ring at 0x%016lX\n", (unsigned long)rdev->cp.gpu_addr);
        WREG32(RADEON_CP_RB_BASE, rdev->cp.gpu_addr);
        /* Force read & write ptr to 0 */
-       tmp = RREG32(RADEON_CP_RB_CNTL);
        WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
        WREG32(RADEON_CP_RB_RPTR_WR, 0);
        WREG32(RADEON_CP_RB_WPTR, 0);
 
 
        /* Set ring buffer size */
        rb_bufsz = drm_order(rdev->cp.ring_size / 8);
+       tmp = RB_NO_UPDATE | (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
 #ifdef __BIG_ENDIAN
-       WREG32(CP_RB_CNTL, BUF_SWAP_32BIT | RB_NO_UPDATE |
-               (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz);
-#else
-       WREG32(CP_RB_CNTL, RB_NO_UPDATE | (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz);
+       tmp |= BUF_SWAP_32BIT;
 #endif
+       WREG32(CP_RB_CNTL, tmp);
        WREG32(CP_SEM_WAIT_TIMER, 0x4);
 
        /* Set the write pointer delay */
        WREG32(CP_RB_WPTR_DELAY, 0);
 
        /* Initialize the ring buffer's read and write pointers */
-       tmp = RREG32(CP_RB_CNTL);
        WREG32(CP_RB_CNTL, tmp | RB_RPTR_WR_ENA);
        WREG32(CP_RB_RPTR_WR, 0);
        WREG32(CP_RB_WPTR, 0);