dma_addr_t dma_dest, dma_src;
        dma_cookie_t cookie;
        int cpu;
+       unsigned long flags;
 
        dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
        dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
-       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len,
-                                        DMA_CTRL_ACK);
+       flags = DMA_CTRL_ACK |
+               DMA_COMPL_SRC_UNMAP_SINGLE |
+               DMA_COMPL_DEST_UNMAP_SINGLE;
+       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
 
        if (!tx) {
                dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
        dma_addr_t dma_dest, dma_src;
        dma_cookie_t cookie;
        int cpu;
+       unsigned long flags;
 
        dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
        dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
-       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len,
-                                        DMA_CTRL_ACK);
+       flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE;
+       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
 
        if (!tx) {
                dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
        dma_addr_t dma_dest, dma_src;
        dma_cookie_t cookie;
        int cpu;
+       unsigned long flags;
 
        dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
        dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
                                DMA_FROM_DEVICE);
-       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len,
-                                        DMA_CTRL_ACK);
+       flags = DMA_CTRL_ACK;
+       tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags);
 
        if (!tx) {
                dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
 
 static void
 ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
 {
-       /*
-        * yes we are unmapping both _page and _single
-        * alloc'd regions with unmap_page. Is this
-        * *really* that bad?
-        */
-       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP))
-               pci_unmap_page(ioat_chan->device->pdev,
-                               pci_unmap_addr(desc, dst),
-                               pci_unmap_len(desc, len),
-                               PCI_DMA_FROMDEVICE);
-
-       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP))
-               pci_unmap_page(ioat_chan->device->pdev,
-                               pci_unmap_addr(desc, src),
-                               pci_unmap_len(desc, len),
-                               PCI_DMA_TODEVICE);
+       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+               if (desc->async_tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE)
+                       pci_unmap_single(ioat_chan->device->pdev,
+                                        pci_unmap_addr(desc, dst),
+                                        pci_unmap_len(desc, len),
+                                        PCI_DMA_FROMDEVICE);
+               else
+                       pci_unmap_page(ioat_chan->device->pdev,
+                                      pci_unmap_addr(desc, dst),
+                                      pci_unmap_len(desc, len),
+                                      PCI_DMA_FROMDEVICE);
+       }
+
+       if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+               if (desc->async_tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE)
+                       pci_unmap_single(ioat_chan->device->pdev,
+                                        pci_unmap_addr(desc, src),
+                                        pci_unmap_len(desc, len),
+                                        PCI_DMA_TODEVICE);
+               else
+                       pci_unmap_page(ioat_chan->device->pdev,
+                                      pci_unmap_addr(desc, src),
+                                      pci_unmap_len(desc, len),
+                                      PCI_DMA_TODEVICE);
+       }
 }
 
 /**
        int err = 0;
        struct completion cmp;
        unsigned long tmo;
+       unsigned long flags;
 
        src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
        if (!src)
                                 DMA_TO_DEVICE);
        dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
                                  DMA_FROM_DEVICE);
+       flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE;
        tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
-                                                  IOAT_TEST_SIZE, 0);
+                                                  IOAT_TEST_SIZE, flags);
        if (!tx) {
                dev_err(&device->pdev->dev,
                        "Self-test prep failed, disabling\n");
 
  *     dependency chains
  * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s)
  * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s)
+ * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single
+ *     (if not set, do the source dma-unmapping as page)
+ * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single
+ *     (if not set, do the destination dma-unmapping as page)
  */
 enum dma_ctrl_flags {
        DMA_PREP_INTERRUPT = (1 << 0),
        DMA_CTRL_ACK = (1 << 1),
        DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2),
        DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3),
+       DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4),
+       DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5),
 };
 
 /**