u64 outbuf_xlat;
        resource_size_t outbuf_size;
        void __iomem *outbuf;
-
+       phys_addr_t out_phys_addr;
+       dma_addr_t dma_dst_addr;
        /* Inbound MW params */
        dma_addr_t inbuf_xlat;
        resource_size_t inbuf_size;
        struct dmaengine_unmap_data *unmap;
        struct device *dma_dev;
        int try = 0, ret = 0;
+       struct perf_peer *peer = pthr->perf->test_peer;
+       void __iomem *vbase;
+       void __iomem *dst_vaddr;
+       dma_addr_t dst_dma_addr;
 
        if (!use_dma) {
                memcpy_toio(dst, src, len);
                                 offset_in_page(dst), len))
                return -EIO;
 
+       vbase = peer->outbuf;
+       dst_vaddr = dst;
+       dst_dma_addr = peer->dma_dst_addr + (dst_vaddr - vbase);
+
        unmap = dmaengine_get_unmap_data(dma_dev, 2, GFP_NOWAIT);
        if (!unmap)
                return -ENOMEM;
        }
        unmap->to_cnt = 1;
 
-       unmap->addr[1] = dma_map_page(dma_dev, virt_to_page(dst),
-               offset_in_page(dst), len, DMA_FROM_DEVICE);
+       unmap->addr[1] = dst_dma_addr;
        if (dma_mapping_error(dma_dev, unmap->addr[1])) {
                ret = -EIO;
                goto err_free_resource;
 {
        struct perf_ctx *perf = pthr->perf;
        dma_cap_mask_t dma_mask;
+       struct perf_peer *peer = pthr->perf->test_peer;
 
        pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL,
                                 dev_to_node(&perf->ntb->dev));
        if (!pthr->dma_chan) {
                dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n",
                        pthr->tidx);
-               atomic_dec(&perf->tsync);
-               wake_up(&perf->twait);
-               kfree(pthr->src);
-               return -ENODEV;
+               goto err_free;
        }
+       peer->dma_dst_addr =
+               dma_map_resource(pthr->dma_chan->device->dev,
+                                peer->out_phys_addr, peer->outbuf_size,
+                                DMA_FROM_DEVICE, 0);
+       if (dma_mapping_error(pthr->dma_chan->device->dev,
+                             peer->dma_dst_addr)) {
+               dev_err(pthr->dma_chan->device->dev, "%d: Failed to map DMA addr\n",
+                       pthr->tidx);
+               peer->dma_dst_addr = 0;
+               dma_release_channel(pthr->dma_chan);
+               goto err_free;
+       }
+       dev_dbg(pthr->dma_chan->device->dev, "%d: Map MMIO %pa to DMA addr %pad\n",
+                       pthr->tidx,
+                       &peer->out_phys_addr,
+                       &peer->dma_dst_addr);
 
        atomic_set(&pthr->dma_sync, 0);
-
        return 0;
+
+err_free:
+       atomic_dec(&perf->tsync);
+       wake_up(&perf->twait);
+       kfree(pthr->src);
+       return -ENODEV;
 }
 
 static int perf_run_test(struct perf_thread *pthr)
         * We call it anyway just to be sure of the transfers completion.
         */
        (void)dmaengine_terminate_sync(pthr->dma_chan);
-
-       dma_release_channel(pthr->dma_chan);
+       if (pthr->perf->test_peer->dma_dst_addr)
+               dma_unmap_resource(pthr->dma_chan->device->dev,
+                                  pthr->perf->test_peer->dma_dst_addr,
+                                  pthr->perf->test_peer->outbuf_size,
+                                  DMA_FROM_DEVICE, 0);
+       if (pthr->dma_chan)
+               dma_release_channel(pthr->dma_chan);
 
 no_dma_notify:
        atomic_dec(&perf->tsync);
                pos += scnprintf(buf + pos, buf_size - pos,
                        "\tOut buffer addr 0x%pK\n", peer->outbuf);
 
+               pos += scnprintf(buf + pos, buf_size - pos,
+                       "\tOut buff phys addr %pa[p]\n", &peer->out_phys_addr);
+
                pos += scnprintf(buf + pos, buf_size - pos,
                        "\tOut buffer size %pa\n", &peer->outbuf_size);
 
        if (!peer->outbuf)
                return -ENOMEM;
 
+       peer->out_phys_addr = phys_addr;
+
        if (max_mw_size && peer->outbuf_size > max_mw_size) {
                peer->outbuf_size = max_mw_size;
                dev_warn(&peer->perf->ntb->dev,