ps = (u32 *) ((char *)dev->agp_buffer_map->handle + dev_priv->blit_vb->offset + 256);
 
        for (i = 0; i < r6xx_vs_size; i++)
-               vs[i] = r6xx_vs[i];
+               vs[i] = cpu_to_le32(r6xx_vs[i]);
        for (i = 0; i < r6xx_ps_size; i++)
-               ps[i] = r6xx_ps[i];
+               ps[i] = cpu_to_le32(r6xx_ps[i]);
 
        dev_priv->blit_vb->used = 512;
 
        DRM_DEBUG("\n");
 
        sq_vtx_constant_word2 = (((gpu_addr >> 32) & 0xff) | (16 << 8));
+#ifdef __BIG_ENDIAN
+       sq_vtx_constant_word2 |= (2 << 30);
+#endif
 
        BEGIN_RING(9);
        OUT_RING(CP_PACKET3(R600_IT_SET_RESOURCE, 7));
        OUT_RING(DI_PT_RECTLIST);
 
        OUT_RING(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+#ifdef __BIG_ENDIAN
+       OUT_RING((2 << 2) | DI_INDEX_SIZE_16_BIT);
+#else
        OUT_RING(DI_INDEX_SIZE_16_BIT);
+#endif
 
        OUT_RING(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
        OUT_RING(1);
 
        r600_do_cp_stop(dev_priv);
 
        RADEON_WRITE(R600_CP_RB_CNTL,
+#ifdef __BIG_ENDIAN
+                    R600_BUF_SWAP_32BIT |
+#endif
                     R600_RB_NO_UPDATE |
                     R600_RB_BLKSZ(15) |
                     R600_RB_BUFSZ(3));
        r600_do_cp_stop(dev_priv);
 
        RADEON_WRITE(R600_CP_RB_CNTL,
+#ifdef __BIG_ENDIAN
+                    R600_BUF_SWAP_32BIT |
+#endif
                     R600_RB_NO_UPDATE |
-                    (15 << 8) |
-                    (3 << 0));
+                    R600_RB_BLKSZ(15) |
+                    R600_RB_BUFSZ(3));
 
        RADEON_WRITE(R600_GRBM_SOFT_RESET, R600_SOFT_RESET_CP);
        RADEON_READ(R600_GRBM_SOFT_RESET);
 
        if (!dev_priv->writeback_works) {
                /* Disable writeback to avoid unnecessary bus master transfer */
-               RADEON_WRITE(R600_CP_RB_CNTL, RADEON_READ(R600_CP_RB_CNTL) |
-                            RADEON_RB_NO_UPDATE);
+               RADEON_WRITE(R600_CP_RB_CNTL,
+#ifdef __BIG_ENDIAN
+                            R600_BUF_SWAP_32BIT |
+#endif
+                            RADEON_READ(R600_CP_RB_CNTL) |
+                            R600_RB_NO_UPDATE);
                RADEON_WRITE(R600_SCRATCH_UMSK, 0);
        }
 }
 
        RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
        cp_rb_cntl = RADEON_READ(R600_CP_RB_CNTL);
-       RADEON_WRITE(R600_CP_RB_CNTL, R600_RB_RPTR_WR_ENA);
+       RADEON_WRITE(R600_CP_RB_CNTL,
+#ifdef __BIG_ENDIAN
+                    R600_BUF_SWAP_32BIT |
+#endif
+                    R600_RB_RPTR_WR_ENA);
 
        RADEON_WRITE(R600_CP_RB_RPTR_WR, cp_ptr);
        RADEON_WRITE(R600_CP_RB_WPTR, cp_ptr);
                        + dev_priv->gart_vm_start;
        }
        RADEON_WRITE(R600_CP_RB_RPTR_ADDR,
-                    rptr_addr & 0xffffffff);
+#ifdef __BIG_ENDIAN
+                    (2 << 0) |
+#endif
+                    (rptr_addr & 0xfffffffc));
        RADEON_WRITE(R600_CP_RB_RPTR_ADDR_HI,
                     upper_32_bits(rptr_addr));
 
        {
                u64 scratch_addr;
 
-               scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR);
+               scratch_addr = RADEON_READ(R600_CP_RB_RPTR_ADDR) & 0xFFFFFFFC;
                scratch_addr |= ((u64)RADEON_READ(R600_CP_RB_RPTR_ADDR_HI)) << 32;
                scratch_addr += R600_SCRATCH_REG_OFFSET;
                scratch_addr >>= 8;
 
 #define R600_CP_RB_CNTL                                        0xc104
 #       define R600_RB_BUFSZ(x)                                ((x) << 0)
 #       define R600_RB_BLKSZ(x)                                ((x) << 8)
+#      define R600_BUF_SWAP_32BIT                             (2 << 16)
 #       define R600_RB_NO_UPDATE                               (1 << 27)
 #       define R600_RB_RPTR_WR_ENA                             (1 << 31)
 #define R600_CP_RB_RPTR_WR                                     0xc108