return ret;
 }
 
+/*
+ * Cache operations depending on function and direction argument, inspired by
+ * https://lkml.org/lkml/2018/5/18/979
+ * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20]
+ * dma-mapping: provide a generic dma-noncoherent implementation)"
+ *
+ *          |   map          ==  for_device     |   unmap     ==  for_cpu
+ *          |----------------------------------------------------------------
+ * TO_DEV   |   writeback        writeback      |   none          none
+ * FROM_DEV |   invalidate       invalidate     |   invalidate*   invalidate*
+ * BIDIR    |   writeback+inv    writeback+inv  |   invalidate    invalidate
+ *
+ *     [*] needed for CPU speculative prefetches
+ *
+ * NOTE: we don't check the validity of direction argument as it is done in
+ * upper layer functions (in include/linux/dma-mapping.h)
+ */
+
 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
                size_t size, enum dma_data_direction dir)
 {
-       dma_cache_wback(paddr, size);
+       switch (dir) {
+       case DMA_TO_DEVICE:
+               dma_cache_wback(paddr, size);
+               break;
+
+       case DMA_FROM_DEVICE:
+               dma_cache_inv(paddr, size);
+               break;
+
+       case DMA_BIDIRECTIONAL:
+               dma_cache_wback_inv(paddr, size);
+               break;
+
+       default:
+               break;
+       }
 }
 
 void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
                size_t size, enum dma_data_direction dir)
 {
-       dma_cache_inv(paddr, size);
+       switch (dir) {
+       case DMA_TO_DEVICE:
+               break;
+
+       /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */
+       case DMA_FROM_DEVICE:
+       case DMA_BIDIRECTIONAL:
+               dma_cache_inv(paddr, size);
+               break;
+
+       default:
+               break;
+       }
 }