.iommu_dev      = smmu->dev,
        };
 
+       if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
+               pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+
        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
        if (!pgtbl_ops)
                return -ENOMEM;
 
                .iommu_dev      = smmu->dev,
        };
 
+       if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+               pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+
        smmu_domain->smmu = smmu;
        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
        if (!pgtbl_ops) {
 
 static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
                                   struct arm_v7s_io_pgtable *data)
 {
-       struct device *dev = data->iop.cfg.iommu_dev;
+       struct io_pgtable_cfg *cfg = &data->iop.cfg;
+       struct device *dev = cfg->iommu_dev;
        dma_addr_t dma;
        size_t size = ARM_V7S_TABLE_SIZE(lvl);
        void *table = NULL;
                table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size));
        else if (lvl == 2)
                table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA);
-       if (table && !selftest_running) {
+       if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
                dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
                if (dma_mapping_error(dev, dma))
                        goto out_free;
 static void __arm_v7s_free_table(void *table, int lvl,
                                 struct arm_v7s_io_pgtable *data)
 {
-       struct device *dev = data->iop.cfg.iommu_dev;
+       struct io_pgtable_cfg *cfg = &data->iop.cfg;
+       struct device *dev = cfg->iommu_dev;
        size_t size = ARM_V7S_TABLE_SIZE(lvl);
 
-       if (!selftest_running)
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
                dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
                                 DMA_TO_DEVICE);
        if (lvl == 1)
 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
                               struct io_pgtable_cfg *cfg)
 {
-       if (selftest_running)
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
                return;
 
        dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
        if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
                            IO_PGTABLE_QUIRK_NO_PERMS |
                            IO_PGTABLE_QUIRK_TLBI_ON_MAP |
-                           IO_PGTABLE_QUIRK_ARM_MTK_4GB))
+                           IO_PGTABLE_QUIRK_ARM_MTK_4GB |
+                           IO_PGTABLE_QUIRK_NO_DMA))
                return NULL;
 
        /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
                .tlb = &dummy_tlb_ops,
                .oas = 32,
                .ias = 32,
-               .quirks = IO_PGTABLE_QUIRK_ARM_NS,
+               .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA,
                .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
        };
        unsigned int iova, size, iova_start;
 
        if (!pages)
                return NULL;
 
-       if (!selftest_running) {
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
                dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE);
                if (dma_mapping_error(dev, dma))
                        goto out_free;
 static void __arm_lpae_free_pages(void *pages, size_t size,
                                  struct io_pgtable_cfg *cfg)
 {
-       if (!selftest_running)
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
                dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages),
                                 size, DMA_TO_DEVICE);
        free_pages_exact(pages, size);
 {
        *ptep = pte;
 
-       if (!selftest_running)
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
                dma_sync_single_for_device(cfg->iommu_dev,
                                           __arm_lpae_dma_addr(ptep),
                                           sizeof(pte), DMA_TO_DEVICE);
        u64 reg;
        struct arm_lpae_io_pgtable *data;
 
-       if (cfg->quirks & ~IO_PGTABLE_QUIRK_ARM_NS)
+       if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA))
                return NULL;
 
        data = arm_lpae_alloc_pgtable(cfg);
        struct arm_lpae_io_pgtable *data;
 
        /* The NS quirk doesn't apply at stage 2 */
-       if (cfg->quirks)
+       if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA)
                return NULL;
 
        data = arm_lpae_alloc_pgtable(cfg);
        struct io_pgtable_cfg cfg = {
                .tlb = &dummy_tlb_ops,
                .oas = 48,
+               .quirks = IO_PGTABLE_QUIRK_NO_DMA,
        };
 
        for (i = 0; i < ARRAY_SIZE(pgsize); ++i) {
 
         *      PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
         *      when the SoC is in "4GB mode" and they can only access the high
         *      remap of DRAM (0x1_00000000 to 0x1_ffffffff).
+        *
+        * IO_PGTABLE_QUIRK_NO_DMA: Guarantees that the tables will only ever
+        *      be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
+        *      software-emulated IOMMU), such that pagetable updates need not
+        *      be treated as explicit DMA data.
         */
        #define IO_PGTABLE_QUIRK_ARM_NS         BIT(0)
        #define IO_PGTABLE_QUIRK_NO_PERMS       BIT(1)
        #define IO_PGTABLE_QUIRK_TLBI_ON_MAP    BIT(2)
        #define IO_PGTABLE_QUIRK_ARM_MTK_4GB    BIT(3)
+       #define IO_PGTABLE_QUIRK_NO_DMA         BIT(4)
        unsigned long                   quirks;
        unsigned long                   pgsize_bitmap;
        unsigned int                    ias;