const struct fb_image *image,
                                    u32 fg, u32 bg)
 {
-       unsigned int src_bytes, dwords;
+       unsigned int dwords;
        u32 *bits;
 
        radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache,
                          rinfo->dp_gui_mc_base |
-                         GMC_BRUSH_NONE |
+                         GMC_BRUSH_NONE | GMC_DST_CLIP_LEAVE |
                          GMC_SRC_DATATYPE_MONO_FG_BG |
                          ROP3_S |
                          GMC_BYTE_ORDER_MSB_TO_LSB |
        radeonfb_set_creg(rinfo, DP_SRC_FRGD_CLR, &rinfo->dp_src_fg_cache, fg);
        radeonfb_set_creg(rinfo, DP_SRC_BKGD_CLR, &rinfo->dp_src_bg_cache, bg);
 
-       radeon_fifo_wait(rinfo, 1);
-       OUTREG(DST_Y_X, (image->dy << 16) | image->dx);
-
        /* Ensure the dst cache is flushed and the engine idle before
         * issuing the operation.
         *
 
        /* X here pads width to a multiple of 32 and uses the clipper to
         * adjust the result. Is that really necessary ? Things seem to
-        * work ok for me without that and the doco doesn't seem to imply
+        * work ok for me without that and the doco doesn't seem to imply]
         * there is such a restriction.
         */
-       OUTREG(DST_WIDTH_HEIGHT, (image->width << 16) | image->height);
+       radeon_fifo_wait(rinfo, 4);
+       OUTREG(SC_TOP_LEFT, (image->dy << 16) | image->dx);
+       OUTREG(SC_BOTTOM_RIGHT, ((image->dy + image->height) << 16) |
+              (image->dx + image->width));
+       OUTREG(DST_Y_X, (image->dy << 16) | image->dx);
+
+       OUTREG(DST_HEIGHT_WIDTH, (image->height << 16) | ((image->width + 31) & ~31));
 
-       src_bytes = (((image->width * image->depth) + 7) / 8) * image->height;
-       dwords = (src_bytes + 3) / 4;
+       dwords = (image->width + 31) >> 5;
+       dwords *= image->height;
        bits = (u32*)(image->data);
 
        while(dwords >= 8) {
 
        info->fbops = &radeonfb_ops;
        info->screen_base = rinfo->fb_base;
        info->screen_size = rinfo->mapped_vram;
+
        /* Fill fix common fields */
        strlcpy(info->fix.id, rinfo->name, sizeof(info->fix.id));
         info->fix.smem_start = rinfo->fb_base_phys;
         info->fix.mmio_len = RADEON_REGSIZE;
        info->fix.accel = FB_ACCEL_ATI_RADEON;
 
+       /* Allocate colormap */
        fb_alloc_cmap(&info->cmap, 256, 0);
 
+       /* Setup pixmap used for acceleration */
+#define PIXMAP_SIZE    (2048 * 4)
+
+       info->pixmap.addr = kmalloc(PIXMAP_SIZE, GFP_KERNEL);
+       if (!info->pixmap.addr) {
+               printk(KERN_ERR "radeonfb: Failed to allocate pixmap !\n");
+               noaccel = 1;
+               goto bail;
+       }
+       info->pixmap.size = PIXMAP_SIZE;
+       info->pixmap.flags = FB_PIXMAP_SYSTEM;
+       info->pixmap.scan_align = 4;
+       info->pixmap.buf_align = 4;
+       info->pixmap.access_align = 32;
+
+bail:
        if (noaccel)
                info->flags |= FBINFO_HWACCEL_DISABLED;